Repository: shareAI-lab/learn-claude-code Branch: main Commit: a9c71002d2ca Files: 156 Total size: 1.2 MB Directory structure: gitextract_p2krhmdp/ ├── .github/ │ └── workflows/ │ ├── ci.yml │ └── test.yml ├── .gitignore ├── LICENSE ├── README-ja.md ├── README-zh.md ├── README.md ├── agents/ │ ├── __init__.py │ ├── s01_agent_loop.py │ ├── s02_tool_use.py │ ├── s03_todo_write.py │ ├── s04_subagent.py │ ├── s05_skill_loading.py │ ├── s06_context_compact.py │ ├── s07_task_system.py │ ├── s08_background_tasks.py │ ├── s09_agent_teams.py │ ├── s10_team_protocols.py │ ├── s11_autonomous_agents.py │ ├── s12_worktree_task_isolation.py │ └── s_full.py ├── docs/ │ ├── en/ │ │ ├── s01-the-agent-loop.md │ │ ├── s02-tool-use.md │ │ ├── s03-todo-write.md │ │ ├── s04-subagent.md │ │ ├── s05-skill-loading.md │ │ ├── s06-context-compact.md │ │ ├── s07-task-system.md │ │ ├── s08-background-tasks.md │ │ ├── s09-agent-teams.md │ │ ├── s10-team-protocols.md │ │ ├── s11-autonomous-agents.md │ │ └── s12-worktree-task-isolation.md │ ├── ja/ │ │ ├── s01-the-agent-loop.md │ │ ├── s02-tool-use.md │ │ ├── s03-todo-write.md │ │ ├── s04-subagent.md │ │ ├── s05-skill-loading.md │ │ ├── s06-context-compact.md │ │ ├── s07-task-system.md │ │ ├── s08-background-tasks.md │ │ ├── s09-agent-teams.md │ │ ├── s10-team-protocols.md │ │ ├── s11-autonomous-agents.md │ │ └── s12-worktree-task-isolation.md │ └── zh/ │ ├── s01-the-agent-loop.md │ ├── s02-tool-use.md │ ├── s03-todo-write.md │ ├── s04-subagent.md │ ├── s05-skill-loading.md │ ├── s06-context-compact.md │ ├── s07-task-system.md │ ├── s08-background-tasks.md │ ├── s09-agent-teams.md │ ├── s10-team-protocols.md │ ├── s11-autonomous-agents.md │ └── s12-worktree-task-isolation.md ├── requirements.txt ├── skills/ │ ├── agent-builder/ │ │ ├── SKILL.md │ │ ├── references/ │ │ │ ├── agent-philosophy.md │ │ │ ├── minimal-agent.py │ │ │ ├── subagent-pattern.py │ │ │ └── tool-templates.py │ │ └── scripts/ │ │ └── init_agent.py │ ├── code-review/ │ │ └── SKILL.md │ ├── mcp-builder/ │ │ └── SKILL.md │ └── pdf/ │ └── SKILL.md └── web/ ├── .gitignore ├── README.md ├── next.config.ts ├── package.json ├── postcss.config.mjs ├── scripts/ │ └── extract-content.ts ├── src/ │ ├── app/ │ │ ├── [locale]/ │ │ │ ├── (learn)/ │ │ │ │ ├── [version]/ │ │ │ │ │ ├── client.tsx │ │ │ │ │ ├── diff/ │ │ │ │ │ │ ├── diff-content.tsx │ │ │ │ │ │ └── page.tsx │ │ │ │ │ └── page.tsx │ │ │ │ ├── compare/ │ │ │ │ │ └── page.tsx │ │ │ │ ├── layers/ │ │ │ │ │ └── page.tsx │ │ │ │ ├── layout.tsx │ │ │ │ └── timeline/ │ │ │ │ └── page.tsx │ │ │ ├── layout.tsx │ │ │ └── page.tsx │ │ ├── globals.css │ │ └── page.tsx │ ├── components/ │ │ ├── architecture/ │ │ │ ├── arch-diagram.tsx │ │ │ ├── design-decisions.tsx │ │ │ ├── execution-flow.tsx │ │ │ └── message-flow.tsx │ │ ├── code/ │ │ │ └── source-viewer.tsx │ │ ├── diff/ │ │ │ ├── code-diff.tsx │ │ │ └── whats-new.tsx │ │ ├── docs/ │ │ │ └── doc-renderer.tsx │ │ ├── layout/ │ │ │ ├── header.tsx │ │ │ └── sidebar.tsx │ │ ├── simulator/ │ │ │ ├── agent-loop-simulator.tsx │ │ │ ├── simulator-controls.tsx │ │ │ └── simulator-message.tsx │ │ ├── timeline/ │ │ │ └── timeline.tsx │ │ ├── ui/ │ │ │ ├── badge.tsx │ │ │ ├── card.tsx │ │ │ └── tabs.tsx │ │ └── visualizations/ │ │ ├── index.tsx │ │ ├── s01-agent-loop.tsx │ │ ├── s02-tool-dispatch.tsx │ │ ├── s03-todo-write.tsx │ │ ├── s04-subagent.tsx │ │ ├── s05-skill-loading.tsx │ │ ├── s06-context-compact.tsx │ │ ├── s07-task-system.tsx │ │ ├── s08-background-tasks.tsx │ │ ├── s09-agent-teams.tsx │ │ ├── s10-team-protocols.tsx │ │ ├── s11-autonomous-agents.tsx │ │ ├── s12-worktree-task-isolation.tsx │ │ └── shared/ │ │ └── step-controls.tsx │ ├── data/ │ │ ├── annotations/ │ │ │ ├── s01.json │ │ │ ├── s02.json │ │ │ ├── s03.json │ │ │ ├── s04.json │ │ │ ├── s05.json │ │ │ ├── s06.json │ │ │ ├── s07.json │ │ │ ├── s08.json │ │ │ ├── s09.json │ │ │ ├── s10.json │ │ │ ├── s11.json │ │ │ └── s12.json │ │ ├── execution-flows.ts │ │ ├── generated/ │ │ │ ├── docs.json │ │ │ └── versions.json │ │ └── scenarios/ │ │ ├── s01.json │ │ ├── s02.json │ │ ├── s03.json │ │ ├── s04.json │ │ ├── s05.json │ │ ├── s06.json │ │ ├── s07.json │ │ ├── s08.json │ │ ├── s09.json │ │ ├── s10.json │ │ ├── s11.json │ │ └── s12.json │ ├── hooks/ │ │ ├── useDarkMode.ts │ │ ├── useSimulator.ts │ │ └── useSteppedVisualization.ts │ ├── i18n/ │ │ └── messages/ │ │ ├── en.json │ │ ├── ja.json │ │ └── zh.json │ ├── lib/ │ │ ├── constants.ts │ │ ├── i18n-server.ts │ │ ├── i18n.tsx │ │ └── utils.ts │ └── types/ │ └── agent-data.ts ├── tsconfig.json └── vercel.json ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/ci.yml ================================================ name: CI on: push: branches: [main] pull_request: branches: [main] jobs: build: runs-on: ubuntu-latest defaults: run: working-directory: web steps: - uses: actions/checkout@v6 - uses: actions/setup-node@v6 with: node-version: 20 cache: npm cache-dependency-path: web/package-lock.json - name: Install dependencies run: npm ci - name: Type check run: npx tsc --noEmit - name: Build run: npm run build ================================================ FILE: .github/workflows/test.yml ================================================ name: Test on: push: branches: [main] pull_request: branches: [main] jobs: unit-test: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: "3.11" - name: Install dependencies run: pip install anthropic python-dotenv - name: Run unit tests run: python tests/test_unit.py session-test: runs-on: ubuntu-latest strategy: matrix: session: [v0, v1, v2, v3, v4, v5, v6, v7, v8a, v8b, v8c, v9] steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: python-version: "3.11" - name: Install dependencies run: pip install anthropic python-dotenv - name: Run ${{ matrix.session }} tests env: TEST_API_KEY: ${{ secrets.TEST_API_KEY }} TEST_BASE_URL: ${{ secrets.TEST_BASE_URL }} TEST_MODEL: ${{ secrets.TEST_MODEL }} run: python tests/test_${{ matrix.session }}.py web-build: runs-on: ubuntu-latest defaults: run: working-directory: web steps: - uses: actions/checkout@v6 - name: Set up Node.js uses: actions/setup-node@v6 with: node-version: "20" cache: "npm" cache-dependency-path: web/package-lock.json - name: Install dependencies run: npm ci - name: Build run: npm run build ================================================ FILE: .gitignore ================================================ # Byte-compiled / optimized / DLL files __pycache__/ *.py[codz] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ /lib/ /lib64/ parts/ sdist/ var/ wheels/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py.cover .hypothesis/ .pytest_cache/ cover/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder .pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: # .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # UV # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. #uv.lock # poetry # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. # This is especially recommended for binary packages to ensure reproducibility, and is more # commonly ignored for libraries. # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control #poetry.lock #poetry.toml # pdm # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. # https://pdm-project.org/en/latest/usage/project/#working-with-version-control #pdm.lock #pdm.toml .pdm-python .pdm-build/ # pixi # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. #pixi.lock # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one # in the .venv directory. It is recommended not to include this directory in version control. .pixi # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .envrc .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # Cython debug symbols cython_debug/ # PyCharm # JetBrains specific template is maintained in a separate JetBrains.gitignore that can # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ # Abstra # Abstra is an AI-powered process automation framework. # Ignore directories containing user credentials, local state, and settings. # Learn more at https://abstra.io/docs .abstra/ # Visual Studio Code # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore # and can be added to the global gitignore or merged into this file. However, if you prefer, # you could uncomment the following to ignore the entire vscode folder # .vscode/ # Transcripts (generated by compression agent) .transcripts/ # Runtime artifacts (generated by agent tests) .task_outputs/ .tasks/ .teams/ # Ruff stuff: .ruff_cache/ # PyPI configuration file .pypirc # Cursor # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data # refer to https://docs.cursor.com/context/ignore-files .cursorignore .cursorindexingignore # Marimo marimo/_static/ marimo/_lsp/ __marimo__/ # Web app web/node_modules/ web/.next/ web/out/ .vercel .env*.local test_providers.py # Internal analysis artifacts (not learning material) analysis/ analysis_progress.md ================================================ FILE: LICENSE ================================================ MIT License Copyright (c) 2024 shareAI Lab Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README-ja.md ================================================ # Learn Claude Code -- 真の Agent のための Harness Engineering [English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md) ## モデルこそが Agent である コードの話をする前に、一つだけ明確にしておく。 **Agent とはモデルのことだ。フレームワークではない。プロンプトチェーンではない。ドラッグ&ドロップのワークフローではない。** ### Agent とは何か Agent とはニューラルネットワークである -- Transformer、RNN、学習された関数 -- 数十億回の勾配更新を経て、行動系列データの上で環境を知覚し、目標を推論し、行動を起こすことを学んだもの。AI における "Agent" という言葉は、始まりからずっとこの意味だった。常に。 人間も Agent だ。数百万年の進化的訓練によって形作られた生物的ニューラルネットワーク。感覚で世界を知覚し、脳で推論し、身体で行動する。DeepMind、OpenAI、Anthropic が "Agent" と言うとき、それはこの分野が誕生以来ずっと意味してきたものと同じだ:**行動することを学んだモデル。** 歴史がその証拠を刻んでいる: - **2013 -- DeepMind DQN が Atari をプレイ。** 単一のニューラルネットワークが、生のピクセルとスコアだけを受け取り、7 つの Atari 2600 ゲームを学習 -- すべての先行アルゴリズムを超え、3 つで人間の専門家を打ち負かした。2015 年には同じアーキテクチャが [49 ゲームに拡張され、プロのテスターに匹敵](https://www.nature.com/articles/nature14236)、*Nature* に掲載。ゲーム固有のルールなし。決定木なし。一つのモデルが経験から学んだ。そのモデルが Agent だった。 - **2019 -- OpenAI Five が Dota 2 を制覇。** 5 つのニューラルネットワークが 10 ヶ月間で [45,000 年分の Dota 2](https://openai.com/index/openai-five-defeats-dota-2-world-champions/) を自己対戦し、サンフランシスコのライブストリームで **OG** -- TI8 世界王者 -- を 2-0 で撃破。その後の公開アリーナでは 42,729 試合で勝率 99.4%。スクリプト化された戦略なし。メタプログラムされたチーム連携なし。モデルが完全に自己対戦を通じてチームワーク、戦術、リアルタイム適応を学んだ。 - **2019 -- DeepMind AlphaStar が StarCraft II をマスター。** AlphaStar は非公開戦で[プロ選手を 10-1 で撃破](https://deepmind.google/blog/alphastar-mastering-the-real-time-strategy-game-starcraft-ii/)、その後ヨーロッパサーバーで[グランドマスター到達](https://www.nature.com/articles/d41586-019-03298-6) -- 90,000 人中の上位 0.15%。不完全情報、リアルタイム判断、チェスや囲碁を遥かに凌駕する組合せ的行動空間を持つゲーム。Agent とは? モデルだ。訓練されたもの。スクリプトではない。 - **2019 -- Tencent 絶悟が王者栄耀を支配。** Tencent AI Lab の「絶悟」は 2019 年 8 月 2 日、世界チャンピオンカップで [KPL プロ選手を 5v5 で撃破](https://www.jiemian.com/article/3371171.html)。1v1 モードではプロが [15 戦中 1 勝のみ、8 分以上生存不可](https://developer.aliyun.com/article/851058)。訓練強度:1 日 = 人間の 440 年。2021 年までに全ヒーロープールで KPL プロを全面的に上回った。手書きのヒーロー相性表なし。スクリプト化されたチーム編成なし。自己対戦でゲーム全体をゼロから学んだモデル。 - **2024-2025 -- LLM Agent がソフトウェアエンジニアリングを再構築。** Claude、GPT、Gemini -- 人類のコードと推論の全幅で訓練された大規模言語モデル -- がコーディング Agent として展開される。コードベースを読み、実装を書き、障害をデバッグし、チームで協調する。アーキテクチャは先行するすべての Agent と同一:訓練されたモデルが環境に配置され、知覚と行動のツールを与えられる。唯一の違いは、学んだものの規模と解くタスクの汎用性。 すべてのマイルストーンが同じ真理を共有している:**"Agent" は決して周囲のコードではない。Agent は常にモデルそのものだ。** ### Agent ではないもの "Agent" という言葉は、プロンプト配管工の産業全体に乗っ取られてしまった。 ドラッグ&ドロップのワークフロービルダー。ノーコード "AI Agent" プラットフォーム。プロンプトチェーン・オーケストレーションライブラリ。すべて同じ幻想を共有している:LLM API 呼び出しを if-else 分岐、ノードグラフ、ハードコードされたルーティングロジックで繋ぎ合わせることが "Agent の構築" だと。 違う。彼らが作ったものはルーブ・ゴールドバーグ・マシンだ -- 過剰に設計された脆い手続き的ルールのパイプライン。LLM は美化されたテキスト補完ノードとして押し込まれているだけ。それは Agent ではない。壮大な妄想を持つシェルスクリプトだ。 **プロンプト配管工式 "Agent" は、モデルを訓練しないプログラマーの妄想だ。** 手続き的ロジックを積み重ねて知能を力技で再現しようとする -- 巨大なルールツリー、ノードグラフ、チェーン・プロンプトの滝 -- そして十分なグルーコードがいつか自律的振る舞いを創発すると祈る。しない。工学的手段で Agency をコーディングすることはできない。Agency は学習されるものであって、プログラムされるものではない。 あのシステムたちは生まれた瞬間から死んでいる:脆弱で、スケールせず、汎化が根本的に不可能。GOFAI(Good Old-Fashioned AI、古典的記号 AI)の現代版だ -- 何十年も前に学術界が放棄した記号ルールシステムが、LLM のペンキを塗り直して再登場した。パッケージが違うだけで、同じ袋小路。 ### マインドシフト:「Agent を開発する」から Harness を開発する へ 「Agent を開発しています」と言うとき、意味できるのは二つだけだ: **1. モデルを訓練する。** 強化学習、ファインチューニング、RLHF、その他の勾配ベースの手法で重みを調整する。タスクプロセスデータ -- 実ドメインにおける知覚・推論・行動の実際の系列 -- を収集し、モデルの振る舞いを形成する。DeepMind、OpenAI、Tencent AI Lab、Anthropic が行っていること。これが最も本来的な Agent 開発。 **2. Harness を構築する。** モデルに動作環境を提供するコードを書く。私たちの大半が行っていることであり、このリポジトリの核心。 Harness とは、Agent が特定のドメインで機能するために必要なすべて: ``` Harness = Tools + Knowledge + Observation + Action Interfaces + Permissions Tools: ファイル I/O、シェル、ネットワーク、データベース、ブラウザ Knowledge: 製品ドキュメント、ドメイン資料、API 仕様、スタイルガイド Observation: git diff、エラーログ、ブラウザ状態、センサーデータ Action: CLI コマンド、API 呼び出し、UI インタラクション Permissions: サンドボックス、承認ワークフロー、信頼境界 ``` モデルが決断する。Harness が実行する。モデルが推論する。Harness がコンテキストを提供する。モデルはドライバー。Harness は車両。 **コーディング Agent の Harness は IDE、ターミナル、ファイルシステム。** 農業 Agent の Harness はセンサーアレイ、灌漑制御、気象データフィード。ホテル Agent の Harness は予約システム、ゲストコミュニケーションチャネル、施設管理 API。Agent -- 知性、意思決定者 -- は常にモデル。Harness はドメインごとに変わる。Agent はドメインを超えて汎化する。 このリポジトリは車両の作り方を教える。コーディング用の車両だ。だが設計パターンはあらゆるドメインに汎化する:農場管理、ホテル運営、工場製造、物流、医療、教育、科学研究。タスクが知覚され、推論され、実行される必要がある場所ならどこでも -- Agent には Harness が要る。 ### Harness エンジニアの仕事 このリポジトリを読んでいるなら、あなたはおそらく Harness エンジニアだ -- それは強力なアイデンティティ。以下があなたの本当の仕事: - **ツールの実装。** Agent に手を与える。ファイル読み書き、シェル実行、API 呼び出し、ブラウザ制御、データベースクエリ。各ツールは Agent が環境内で取れる行動。原子的で、組み合わせ可能で、記述が明確であるように設計する。 - **知識のキュレーション。** Agent にドメイン専門性を与える。製品ドキュメント、アーキテクチャ決定記録、スタイルガイド、規制要件。オンデマンドで読み込み(s05)、前もって詰め込まない。Agent は何が利用可能か知った上で、必要なものを自ら取得すべき。 - **コンテキストの管理。** Agent にクリーンな記憶を与える。サブ Agent 隔離(s04)がノイズの漏洩を防ぐ。コンテキスト圧縮(s06)が履歴の氾濫を防ぐ。タスクシステム(s07)が目標を単一の会話を超えて永続化する。 - **権限の制御。** Agent に境界を与える。ファイルアクセスのサンドボックス化。破壊的操作への承認要求。Agent と外部システム間の信頼境界の実施。安全工学と Harness 工学の交差点。 - **タスクプロセスデータの収集。** Agent があなたの Harness 内で実行するすべての行動系列は訓練シグナル。実デプロイメントの知覚-推論-行動トレースは、次世代 Agent モデルをファインチューニングする原材料。あなたの Harness は Agent に仕えるだけでなく -- Agent を進化させる助けにもなる。 あなたは知性を書いているのではない。知性が住まう世界を構築している。その世界の品質 -- Agent がどれだけ明瞭に知覚でき、どれだけ正確に行動でき、利用可能な知識がどれだけ豊かか -- が、知性がどれだけ効果的に自らを表現できるかを直接決定する。 **優れた Harness を作れ。Agent が残りをやる。** ### なぜ Claude Code か -- Harness Engineering の大師範 なぜこのリポジトリは特に Claude Code を解剖するのか? Claude Code は私たちが見てきた中で最もエレガントで完成度の高い Agent Harness だからだ。単一の巧妙なトリックのためではなく、それが *しないこと* のために:Agent そのものになろうとしない。硬直的なワークフローを押し付けない。精緻な決定木でモデルを二度推しない。ツール、知識、コンテキスト管理、権限境界をモデルに提供し -- そして道を譲る。 Claude Code の本質を剥き出しにすると: ``` Claude Code = 一つの agent loop + ツール (bash, read, write, edit, glob, grep, browser...) + オンデマンド skill ロード + コンテキスト圧縮 + サブ Agent スポーン + 依存グラフ付きタスクシステム + 非同期メールボックスによるチーム協調 + worktree 分離による並列実行 + 権限ガバナンス ``` これがすべてだ。これが全アーキテクチャ。すべてのコンポーネントは Harness メカニズム -- Agent が住む世界の一部。Agent そのものは? Claude だ。モデル。Anthropic が人類の推論とコードの全幅で訓練した。Harness が Claude を賢くしたのではない。Claude は元々賢い。Harness が Claude に手と目とワークスペースを与えた。 これが Claude Code が理想的な教材である理由だ:**モデルを信頼し、工学的努力を Harness に集中させるとどうなるかを示している。** このリポジトリの各セッション(s01-s12)は Claude Code アーキテクチャから一つの Harness メカニズムをリバースエンジニアリングする。終了時には、Claude Code の仕組みだけでなく、あらゆるドメインのあらゆる Agent に適用される Harness 工学の普遍的原則を理解している。 教訓は「Claude Code をコピーせよ」ではない。教訓は:**最高の Agent プロダクトは、自分の仕事が Harness であって Intelligence ではないと理解しているエンジニアが作る。** --- ## ビジョン:宇宙を本物の Agent で満たす これはコーディング Agent だけの話ではない。 人間が複雑で多段階の判断集約的な仕事をしているすべてのドメインは、Agent が稼働できるドメインだ -- 正しい Harness さえあれば。このリポジトリのパターンは普遍的だ: ``` 不動産管理 Agent = モデル + 物件センサー + メンテナンスツール + テナント通信 農業 Agent = モデル + 土壌/気象データ + 灌漑制御 + 作物知識 ホテル運営 Agent = モデル + 予約システム + ゲストチャネル + 施設 API 医学研究 Agent = モデル + 文献検索 + 実験機器 + プロトコル文書 製造 Agent = モデル + 生産ラインセンサー + 品質管理 + 物流 教育 Agent = モデル + カリキュラム知識 + 学生進捗 + 評価ツール ``` ループは常に同じ。ツールが変わる。知識が変わる。権限が変わる。Agent -- モデル -- がすべてを汎化する。 このリポジトリを読むすべての Harness エンジニアは、ソフトウェアエンジニアリングを遥かに超えたパターンを学んでいる。知的で自動化された未来のためのインフラストラクチャを構築することを学んでいる。実ドメインにデプロイされた優れた Harness の一つ一つが、Agent が知覚し、推論し、行動できる新たな拠点。 まずワークショップを満たす。次に農場、病院、工場。次に都市。次に惑星。 **Bash is all you need. Real agents are all the universe needs.** --- ``` THE AGENT PATTERN ================= User --> messages[] --> LLM --> response | stop_reason == "tool_use"? / \ yes no | | execute tools return text append results loop back -----------------> messages[] 最小ループ。すべての AI Agent にこのループが必要だ。 モデルがツール呼び出しと停止を決める。 コードはモデルの要求を実行するだけ。 このリポジトリはこのループを囲むすべて -- Agent を特定ドメインで効果的にする Harness -- の作り方を教える。 ``` **12 の段階的セッション、シンプルなループから分離された自律実行まで。** **各セッションは 1 つの Harness メカニズムを追加する。各メカニズムには 1 つのモットーがある。** > **s01**   *"One loop & Bash is all you need"* — 1つのツール + 1つのループ = エージェント > > **s02**   *"ツールを足すなら、ハンドラーを1つ足すだけ"* — ループは変わらない。新ツールは dispatch map に登録するだけ > > **s03**   *"計画のないエージェントは行き当たりばったり"* — まずステップを書き出し、それから実行 > > **s04**   *"大きなタスクを分割し、各サブタスクにクリーンなコンテキストを"* — サブエージェントは独立した messages[] を使い、メイン会話を汚さない > > **s05**   *"必要な知識を、必要な時に読み込む"* — system prompt ではなく tool_result で注入 > > **s06**   *"コンテキストはいつか溢れる、空ける手段が要る"* — 3層圧縮で無限セッションを実現 > > **s07**   *"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する"* — ファイルベースのタスクグラフ、マルチエージェント協調の基盤 > > **s08**   *"遅い操作はバックグラウンドへ、エージェントは次を考え続ける"* — デーモンスレッドがコマンド実行、完了後に通知を注入 > > **s09**   *"一人で終わらないなら、チームメイトに任せる"* — 永続チームメイト + 非同期メールボックス > > **s10**   *"チームメイト間には統一の通信ルールが必要"* — 1つの request-response パターンが全交渉を駆動 > > **s11**   *"チームメイトが自らボードを見て、仕事を取る"* — リーダーが逐一割り振る必要はない > > **s12**   *"各自のディレクトリで作業し、互いに干渉しない"* — タスクは目標を管理、worktree はディレクトリを管理、IDで紐付け --- ## コアパターン ```python def agent_loop(messages): while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": output = TOOL_HANDLERS[block.name](**block.input) results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) messages.append({"role": "user", "content": results}) ``` 各セッションはこのループの上に 1 つの Harness メカニズムを重ねる -- ループ自体は変わらない。ループは Agent のもの。メカニズムは Harness のもの。 ## スコープ (重要) このリポジトリは Harness 工学の 0->1 学習プロジェクト -- Agent モデルを囲む環境の構築を学ぶ。 学習を優先するため、以下の本番メカニズムは意図的に簡略化または省略している: - 完全なイベント / Hook バス (例: PreToolUse, SessionStart/End, ConfigChange)。 s12 では教材用に最小の追記型ライフサイクルイベントのみ実装。 - ルールベースの権限ガバナンスと信頼フロー - セッションライフサイクル制御 (resume/fork) と高度な worktree ライフサイクル制御 - MCP ランタイムの詳細 (transport/OAuth/リソース購読/ポーリング) このリポジトリの JSONL メールボックス方式は教材用の実装であり、特定の本番内部実装を主張するものではない。 ## クイックスタート ```sh git clone https://github.com/shareAI-lab/learn-claude-code cd learn-claude-code pip install -r requirements.txt cp .env.example .env # .env を編集して ANTHROPIC_API_KEY を入力 python agents/s01_agent_loop.py # ここから開始 python agents/s12_worktree_task_isolation.py # 全セッションの到達点 python agents/s_full.py # 総括: 全メカニズム統合 ``` ### Web プラットフォーム インタラクティブな可視化、ステップスルーアニメーション、ソースビューア、各セッションのドキュメント。 ```sh cd web && npm install && npm run dev # http://localhost:3000 ``` ## 学習パス ``` フェーズ1: ループ フェーズ2: 計画と知識 ================== ============================== s01 エージェントループ [1] s03 TodoWrite [5] while + stop_reason TodoManager + nag リマインダー | | +-> s02 Tool Use [4] s04 サブエージェント [5] dispatch map: name->handler 子ごとに新しい messages[] | s05 Skills [5] SKILL.md を tool_result で注入 | s06 Context Compact [5] 3層コンテキスト圧縮 フェーズ3: 永続化 フェーズ4: チーム ================== ===================== s07 タスクシステム [8] s09 エージェントチーム [9] ファイルベース CRUD + 依存グラフ チームメイト + JSONL メールボックス | | s08 バックグラウンドタスク [6] s10 チームプロトコル [12] デーモンスレッド + 通知キュー シャットダウン + プラン承認 FSM | s11 自律エージェント [14] アイドルサイクル + 自動クレーム | s12 Worktree 分離 [16] タスク調整 + 必要時の分離実行レーン [N] = ツール数 ``` ## プロジェクト構成 ``` learn-claude-code/ | |-- agents/ # Python リファレンス実装 (s01-s12 + s_full 総括) |-- docs/{en,zh,ja}/ # メンタルモデル優先のドキュメント (3言語) |-- web/ # インタラクティブ学習プラットフォーム (Next.js) |-- skills/ # s05 の Skill ファイル +-- .github/workflows/ci.yml # CI: 型チェック + ビルド ``` ## ドキュメント メンタルモデル優先: 問題、解決策、ASCII図、最小限のコード。 [English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/ja/) | セッション | トピック | モットー | |-----------|---------|---------| | [s01](./docs/ja/s01-the-agent-loop.md) | エージェントループ | *One loop & Bash is all you need* | | [s02](./docs/ja/s02-tool-use.md) | Tool Use | *ツールを足すなら、ハンドラーを1つ足すだけ* | | [s03](./docs/ja/s03-todo-write.md) | TodoWrite | *計画のないエージェントは行き当たりばったり* | | [s04](./docs/ja/s04-subagent.md) | サブエージェント | *大きなタスクを分割し、各サブタスクにクリーンなコンテキストを* | | [s05](./docs/ja/s05-skill-loading.md) | Skills | *必要な知識を、必要な時に読み込む* | | [s06](./docs/ja/s06-context-compact.md) | Context Compact | *コンテキストはいつか溢れる、空ける手段が要る* | | [s07](./docs/ja/s07-task-system.md) | タスクシステム | *大きな目標を小タスクに分解し、順序付けし、ディスクに記録する* | | [s08](./docs/ja/s08-background-tasks.md) | バックグラウンドタスク | *遅い操作はバックグラウンドへ、エージェントは次を考え続ける* | | [s09](./docs/ja/s09-agent-teams.md) | エージェントチーム | *一人で終わらないなら、チームメイトに任せる* | | [s10](./docs/ja/s10-team-protocols.md) | チームプロトコル | *チームメイト間には統一の通信ルールが必要* | | [s11](./docs/ja/s11-autonomous-agents.md) | 自律エージェント | *チームメイトが自らボードを見て、仕事を取る* | | [s12](./docs/ja/s12-worktree-task-isolation.md) | Worktree + タスク分離 | *各自のディレクトリで作業し、互いに干渉しない* | ## 次のステップ -- 理解から出荷へ 12 セッションを終えれば、Harness 工学の内部構造を完全に理解している。その知識を活かす 2 つの方法: ### Kode Agent CLI -- オープンソース Coding Agent CLI > `npm i -g @shareai-lab/kode` Skill & LSP 対応、Windows 対応、GLM / MiniMax / DeepSeek 等のオープンモデルに接続可能。インストールしてすぐ使える。 GitHub: **[shareAI-lab/Kode-cli](https://github.com/shareAI-lab/Kode-cli)** ### Kode Agent SDK -- アプリにエージェント機能を埋め込む 公式 Claude Code Agent SDK は内部で完全な CLI プロセスと通信する -- 同時ユーザーごとに独立のターミナルプロセスが必要。Kode SDK は独立ライブラリでユーザーごとのプロセスオーバーヘッドがなく、バックエンド、ブラウザ拡張、組み込みデバイス等に埋め込み可能。 GitHub: **[shareAI-lab/Kode-agent-sdk](https://github.com/shareAI-lab/Kode-agent-sdk)** --- ## 姉妹教材: *オンデマンドセッション*から*常時稼働アシスタント*へ 本リポジトリが教える Harness は **使い捨て型** -- ターミナルを開き、Agent にタスクを与え、終わったら閉じる。次のセッションは白紙から始まる。Claude Code のモデル。 [OpenClaw](https://github.com/openclaw/openclaw) は別の可能性を証明した: 同じ agent core の上に 2 つの Harness メカニズムを追加するだけで、Agent は「突かないと動かない」から「30 秒ごとに自分で起きて仕事を探す」に変わる: - **ハートビート** -- 30 秒ごとに Harness が Agent にメッセージを送り、やることがあるか確認させる。なければスリープ続行、あれば即座に行動。 - **Cron** -- Agent が自ら未来のタスクをスケジュールし、時間が来たら自動実行。 さらにマルチチャネル IM ルーティング (WhatsApp / Telegram / Slack / Discord 等 13+ プラットフォーム)、永続コンテキストメモリ、Soul パーソナリティシステムを加えると、Agent は使い捨てツールから常時稼働のパーソナル AI アシスタントへ変貌する。 **[claw0](https://github.com/shareAI-lab/claw0)** はこれらの Harness メカニズムをゼロから分解する姉妹教材リポジトリ: ``` claw agent = agent core + heartbeat + cron + IM chat + memory + soul ``` ``` learn-claude-code claw0 (agent harness コア: (能動的な常時稼働 harness: ループ、ツール、計画、 ハートビート、cron、IM チャネル、 チーム、worktree 分離) メモリ、Soul パーソナリティ) ``` ## ライセンス MIT --- **モデルが Agent だ。コードは Harness だ。優れた Harness を作れ。Agent が残りをやる。** **Bash is all you need. Real agents are all the universe needs.** ================================================ FILE: README-zh.md ================================================ # Learn Claude Code -- 真正的 Agent Harness 工程 [English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md) ## 模型就是 Agent 在讨论代码之前,先把一件事彻底说清楚。 **Agent 是模型。不是框架。不是提示词链。不是拖拽式工作流。** ### Agent 到底是什么 Agent 是一个神经网络 -- Transformer、RNN、一个被训练出来的函数 -- 经过数十亿次梯度更新,在行动序列数据上学会了感知环境、推理目标、采取行动。"Agent" 这个词在 AI 领域从诞生之日起就是这个意思。从来都是。 人类就是 agent。一个由数百万年进化训练出来的生物神经网络,通过感官感知世界,通过大脑推理,通过身体行动。当 DeepMind、OpenAI 或 Anthropic 说 "agent" 时,他们说的和这个领域自诞生以来就一直在说的完全一样:**一个学会了行动的模型。** 历史已经写好了铁证: - **2013 -- DeepMind DQN 玩 Atari。** 一个神经网络,只接收原始像素和游戏分数,学会了 7 款 Atari 2600 游戏 -- 超越所有先前算法,在其中 3 款上击败人类专家。到 2015 年,同一架构扩展到 [49 款游戏,达到职业人类测试员水平](https://www.nature.com/articles/nature14236),论文发表在 *Nature*。没有游戏专属规则。没有决策树。一个模型,从经验中学习。那个模型就是 agent。 - **2019 -- OpenAI Five 征服 Dota 2。** 五个神经网络,在 10 个月内与自己对战了 [45,000 年的 Dota 2](https://openai.com/index/openai-five-defeats-dota-2-world-champions/),在旧金山直播赛上 2-0 击败了 **OG** -- TI8 世界冠军。随后的公开竞技场中,AI 在 42,729 场比赛中胜率 99.4%。没有脚本化的策略。没有元编程的团队协调逻辑。模型完全通过自我对弈学会了团队协作、战术和实时适应。 - **2019 -- DeepMind AlphaStar 制霸星际争霸 II。** AlphaStar 在闭门赛中 [10-1 击败职业选手](https://deepmind.google/blog/alphastar-mastering-the-real-time-strategy-game-starcraft-ii/),随后在欧洲服务器上达到[宗师段位](https://www.nature.com/articles/d41586-019-03298-6) -- 90,000 名玩家中的前 0.15%。一个信息不完全、实时决策、组合动作空间远超国际象棋和围棋的游戏。Agent 是什么?是模型。训练出来的。不是编出来的。 - **2019 -- 腾讯绝悟统治王者荣耀。** 腾讯 AI Lab 的 "绝悟" 于 2019 年 8 月 2 日世冠杯半决赛上[以 5v5 击败 KPL 职业选手](https://www.jiemian.com/article/3371171.html)。在 1v1 模式下,职业选手 [15 场只赢 1 场,最多坚持不到 8 分钟](https://developer.aliyun.com/article/851058)。训练强度:一天等于人类 440 年。到 2021 年,绝悟在全英雄池 BO5 上全面超越 KPL 职业选手水准。没有手工编写的英雄克制表。没有脚本化的阵容编排。一个从零开始通过自我对弈学习整个游戏的模型。 - **2024-2025 -- LLM Agent 重塑软件工程。** Claude、GPT、Gemini -- 在人类全部代码和推理上训练的大语言模型 -- 被部署为编程 agent。它们阅读代码库,编写实现,调试故障,团队协作。架构与之前每一个 agent 完全相同:一个训练好的模型,放入一个环境,给予感知和行动的工具。唯一的不同是它们学到的东西的规模和解决任务的通用性。 每一个里程碑都共享同一个真理:**"Agent" 从来都不是外面那层代码。Agent 永远是模型本身。** ### Agent 不是什么 "Agent" 这个词已经被一整个提示词水管工产业劫持了。 拖拽式工作流构建器。无代码 "AI Agent" 平台。提示词链编排库。它们共享同一个幻觉:把 LLM API 调用用 if-else 分支、节点图、硬编码路由逻辑串在一起就算是 "构建 Agent" 了。 不是的。它们做出来的东西是鲁布·戈德堡机械 -- 一个过度工程化的、脆弱的过程式规则流水线,LLM 被楔在里面当一个美化了的文本补全节点。那不是 Agent。那是一个有着宏大妄想的 shell 脚本。 **提示词水管工式 "Agent" 是不做模型的程序员的意淫。** 他们试图通过堆叠过程式逻辑来暴力模拟智能 -- 庞大的规则树、节点图、链式提示词瀑布流 -- 然后祈祷足够多的胶水代码能涌现出自主行为。不会的。你不可能通过工程手段编码出 agency。Agency 是学出来的,不是编出来的。 那些系统从诞生之日起就已经死了:脆弱、不可扩展、根本不具备泛化能力。它们是 GOFAI(Good Old-Fashioned AI,经典符号 AI)的现代还魂 -- 几十年前就被学界抛弃的符号规则系统,现在喷了一层 LLM 的漆又登场了。换了个包装,同一条死路。 ### 心智转换:从 "开发 Agent" 到开发 Harness 当一个人说 "我在开发 Agent" 时,他只可能是两个意思之一: **1. 训练模型。** 通过强化学习、微调、RLHF 或其他基于梯度的方法调整权重。收集任务过程数据 -- 真实领域中感知、推理、行动的实际序列 -- 用它们来塑造模型的行为。这是 DeepMind、OpenAI、腾讯 AI Lab、Anthropic 在做的事。这是最本义的 Agent 开发。 **2. 构建 Harness。** 编写代码,为模型提供一个可操作的环境。这是我们大多数人在做的事,也是本仓库的核心。 Harness 是 agent 在特定领域工作所需要的一切: ``` Harness = Tools + Knowledge + Observation + Action Interfaces + Permissions Tools: 文件读写、Shell、网络、数据库、浏览器 Knowledge: 产品文档、领域资料、API 规范、风格指南 Observation: git diff、错误日志、浏览器状态、传感器数据 Action: CLI 命令、API 调用、UI 交互 Permissions: 沙箱隔离、审批流程、信任边界 ``` 模型做决策。Harness 执行。模型做推理。Harness 提供上下文。模型是驾驶者。Harness 是载具。 **编程 agent 的 harness 是它的 IDE、终端和文件系统。** 农业 agent 的 harness 是传感器阵列、灌溉控制和气象数据。酒店 agent 的 harness 是预订系统、客户沟通渠道和设施管理 API。Agent -- 那个智能、那个决策者 -- 永远是模型。Harness 因领域而变。Agent 跨领域泛化。 这个仓库教你造载具。编程用的载具。但设计模式可以泛化到任何领域:庄园管理、农田运营、酒店运作、工厂制造、物流调度、医疗保健、教育培训、科学研究。只要有一个任务需要被感知、推理和执行 -- agent 就需要一个 harness。 ### Harness 工程师到底在做什么 如果你在读这个仓库,你很可能是一名 harness 工程师 -- 这是一个强大的身份。以下是你真正的工作: - **实现工具。** 给 agent 一双手。文件读写、Shell 执行、API 调用、浏览器控制、数据库查询。每个工具都是 agent 在环境中可以采取的一个行动。设计它们时要原子化、可组合、描述清晰。 - **策划知识。** 给 agent 领域专长。产品文档、架构决策记录、风格指南、合规要求。按需加载(s05),不要前置塞入。Agent 应该知道有什么可用,然后自己拉取所需。 - **管理上下文。** 给 agent 干净的记忆。子 agent 隔离(s04)防止噪声泄露。上下文压缩(s06)防止历史淹没。任务系统(s07)让目标持久化到单次对话之外。 - **控制权限。** 给 agent 边界。沙箱化文件访问。对破坏性操作要求审批。在 agent 和外部系统之间实施信任边界。这是安全工程与 harness 工程的交汇点。 - **收集任务过程数据。** Agent 在你的 harness 中执行的每一条行动序列都是训练信号。真实部署中的感知-推理-行动轨迹是微调下一代 agent 模型的原材料。你的 harness 不仅服务于 agent -- 它还可以帮助进化 agent。 你不是在编写智能。你是在构建智能栖居的世界。这个世界的质量 -- agent 能看得多清楚、行动得多精准、可用知识有多丰富 -- 直接决定了智能能多有效地表达自己。 **造好 Harness。Agent 会完成剩下的。** ### 为什么是 Claude Code -- Harness 工程的大师课 为什么这个仓库专门拆解 Claude Code? 因为 Claude Code 是我们所见过的最优雅、最完整的 agent harness 实现。不是因为某个巧妙的技巧,而是因为它 *没做* 的事:它没有试图成为 agent 本身。它没有强加僵化的工作流。它没有用精心设计的决策树去替模型做判断。它给模型提供了工具、知识、上下文管理和权限边界 -- 然后让开了。 把 Claude Code 剥到本质来看: ``` Claude Code = 一个 agent loop + 工具 (bash, read, write, edit, glob, grep, browser...) + 按需 skill 加载 + 上下文压缩 + 子 agent 派生 + 带依赖图的任务系统 + 异步邮箱的团队协调 + worktree 隔离的并行执行 + 权限治理 ``` 就这些。这就是全部架构。每一个组件都是 harness 机制 -- 为 agent 构建的栖居世界的一部分。Agent 本身呢?是 Claude。一个模型。由 Anthropic 在人类推理和代码的全部广度上训练而成。Harness 没有让 Claude 变聪明。Claude 本来就聪明。Harness 给了 Claude 双手、双眼和一个工作空间。 这就是 Claude Code 作为教学标本的意义:**它展示了当你信任模型、把工程精力集中在 harness 上时会发生什么。** 本仓库的每一个课程(s01-s12)都在逆向工程 Claude Code 架构中的一个 harness 机制。学完之后,你理解的不只是 Claude Code 怎么工作,而是适用于任何领域、任何 agent 的 harness 工程通用原则。 启示不是 "复制 Claude Code"。启示是:**最好的 agent 产品,出自那些明白自己的工作是 harness 而非 intelligence 的工程师之手。** --- ## 愿景:用真正的 Agent 铺满宇宙 这不只关乎编程 agent。 每一个人类从事复杂、多步骤、需要判断力的工作的领域,都是 agent 可以运作的领域 -- 只要有对的 harness。本仓库中的模式是通用的: ``` 庄园管理 agent = 模型 + 物业传感器 + 维护工具 + 租户通信 农业 agent = 模型 + 土壤/气象数据 + 灌溉控制 + 作物知识 酒店运营 agent = 模型 + 预订系统 + 客户渠道 + 设施 API 医学研究 agent = 模型 + 文献检索 + 实验仪器 + 协议文档 制造业 agent = 模型 + 产线传感器 + 质量控制 + 物流系统 教育 agent = 模型 + 课程知识 + 学生进度 + 评估工具 ``` 循环永远不变。工具在变。知识在变。权限在变。Agent -- 那个模型 -- 泛化一切。 每一个读这个仓库的 harness 工程师都在学习远超软件工程的模式。你在学习为一个智能的、自动化的未来构建基础设施。每一个部署在真实领域的好 harness,都是 agent 能够感知、推理、行动的又一个阵地。 先铺满工作室。然后是农田、医院、工厂。然后是城市。然后是星球。 **Bash is all you need. Real agents are all the universe needs.** --- ``` THE AGENT PATTERN ================= User --> messages[] --> LLM --> response | stop_reason == "tool_use"? / \ yes no | | execute tools return text append results loop back -----------------> messages[] 这是最小循环。每个 AI Agent 都需要这个循环。 模型决定何时调用工具、何时停止。 代码只是执行模型的要求。 本仓库教你构建围绕这个循环的一切 -- 让 agent 在特定领域高效工作的 harness。 ``` **12 个递进式课程, 从简单循环到隔离化的自治执行。** **每个课程添加一个 harness 机制。每个机制有一句格言。** > **s01**   *"One loop & Bash is all you need"* — 一个工具 + 一个循环 = 一个智能体 > > **s02**   *"加一个工具, 只加一个 handler"* — 循环不用动, 新工具注册进 dispatch map 就行 > > **s03**   *"没有计划的 agent 走哪算哪"* — 先列步骤再动手, 完成率翻倍 > > **s04**   *"大任务拆小, 每个小任务干净的上下文"* — 子智能体用独立 messages[], 不污染主对话 > > **s05**   *"用到什么知识, 临时加载什么知识"* — 通过 tool_result 注入, 不塞 system prompt > > **s06**   *"上下文总会满, 要有办法腾地方"* — 三层压缩策略, 换来无限会话 > > **s07**   *"大目标要拆成小任务, 排好序, 记在磁盘上"* — 文件持久化的任务图, 为多 agent 协作打基础 > > **s08**   *"慢操作丢后台, agent 继续想下一步"* — 后台线程跑命令, 完成后注入通知 > > **s09**   *"任务太大一个人干不完, 要能分给队友"* — 持久化队友 + 异步邮箱 > > **s10**   *"队友之间要有统一的沟通规矩"* — 一个 request-response 模式驱动所有协商 > > **s11**   *"队友自己看看板, 有活就认领"* — 不需要领导逐个分配, 自组织 > > **s12**   *"各干各的目录, 互不干扰"* — 任务管目标, worktree 管目录, 按 ID 绑定 --- ## 核心模式 ```python def agent_loop(messages): while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": output = TOOL_HANDLERS[block.name](**block.input) results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) messages.append({"role": "user", "content": results}) ``` 每个课程在这个循环之上叠加一个 harness 机制 -- 循环本身始终不变。循环属于 agent。机制属于 harness。 ## 范围说明 (重要) 本仓库是一个 0->1 的 harness 工程学习项目 -- 构建围绕 agent 模型的工作环境。 为保证学习路径清晰,仓库有意简化或省略了部分生产机制: - 完整事件 / Hook 总线 (例如 PreToolUse、SessionStart/End、ConfigChange)。 s12 仅提供教学用途的最小 append-only 生命周期事件流。 - 基于规则的权限治理与信任流程 - 会话生命周期控制 (resume/fork) 与更完整的 worktree 生命周期控制 - 完整 MCP 运行时细节 (transport/OAuth/资源订阅/轮询) 仓库中的团队 JSONL 邮箱协议是教学实现,不是对任何特定生产内部实现的声明。 ## 快速开始 ```sh git clone https://github.com/shareAI-lab/learn-claude-code cd learn-claude-code pip install -r requirements.txt cp .env.example .env # 编辑 .env 填入你的 ANTHROPIC_API_KEY python agents/s01_agent_loop.py # 从这里开始 python agents/s12_worktree_task_isolation.py # 完整递进终点 python agents/s_full.py # 总纲: 全部机制合一 ``` ### Web 平台 交互式可视化、分步动画、源码查看器, 以及每个课程的文档。 ```sh cd web && npm install && npm run dev # http://localhost:3000 ``` ## 学习路径 ``` 第一阶段: 循环 第二阶段: 规划与知识 ================== ============================== s01 Agent 循环 [1] s03 TodoWrite [5] while + stop_reason TodoManager + nag 提醒 | | +-> s02 Tool Use [4] s04 子智能体 [5] dispatch map: name->handler 每个子智能体独立 messages[] | s05 Skills [5] SKILL.md 通过 tool_result 注入 | s06 Context Compact [5] 三层上下文压缩 第三阶段: 持久化 第四阶段: 团队 ================== ===================== s07 任务系统 [8] s09 智能体团队 [9] 文件持久化 CRUD + 依赖图 队友 + JSONL 邮箱 | | s08 后台任务 [6] s10 团队协议 [12] 守护线程 + 通知队列 关机 + 计划审批 FSM | s11 自治智能体 [14] 空闲轮询 + 自动认领 | s12 Worktree 隔离 [16] 任务协调 + 按需隔离执行通道 [N] = 工具数量 ``` ## 项目结构 ``` learn-claude-code/ | |-- agents/ # Python 参考实现 (s01-s12 + s_full 总纲) |-- docs/{en,zh,ja}/ # 心智模型优先的文档 (3 种语言) |-- web/ # 交互式学习平台 (Next.js) |-- skills/ # s05 的 Skill 文件 +-- .github/workflows/ci.yml # CI: 类型检查 + 构建 ``` ## 文档 心智模型优先: 问题、方案、ASCII 图、最小化代码。 [English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/ja/) | 课程 | 主题 | 格言 | |------|------|------| | [s01](./docs/zh/s01-the-agent-loop.md) | Agent 循环 | *One loop & Bash is all you need* | | [s02](./docs/zh/s02-tool-use.md) | Tool Use | *加一个工具, 只加一个 handler* | | [s03](./docs/zh/s03-todo-write.md) | TodoWrite | *没有计划的 agent 走哪算哪* | | [s04](./docs/zh/s04-subagent.md) | 子智能体 | *大任务拆小, 每个小任务干净的上下文* | | [s05](./docs/zh/s05-skill-loading.md) | Skills | *用到什么知识, 临时加载什么知识* | | [s06](./docs/zh/s06-context-compact.md) | Context Compact | *上下文总会满, 要有办法腾地方* | | [s07](./docs/zh/s07-task-system.md) | 任务系统 | *大目标要拆成小任务, 排好序, 记在磁盘上* | | [s08](./docs/zh/s08-background-tasks.md) | 后台任务 | *慢操作丢后台, agent 继续想下一步* | | [s09](./docs/zh/s09-agent-teams.md) | 智能体团队 | *任务太大一个人干不完, 要能分给队友* | | [s10](./docs/zh/s10-team-protocols.md) | 团队协议 | *队友之间要有统一的沟通规矩* | | [s11](./docs/zh/s11-autonomous-agents.md) | 自治智能体 | *队友自己看看板, 有活就认领* | | [s12](./docs/zh/s12-worktree-task-isolation.md) | Worktree + 任务隔离 | *各干各的目录, 互不干扰* | ## 学完之后 -- 从理解到落地 12 个课程走完, 你已经从内到外理解了 harness 工程的运作原理。两种方式把知识变成产品: ### Kode Agent CLI -- 开源 Coding Agent CLI > `npm i -g @shareai-lab/kode` 支持 Skill & LSP, 适配 Windows, 可接 GLM / MiniMax / DeepSeek 等开放模型。装完即用。 GitHub: **[shareAI-lab/Kode-cli](https://github.com/shareAI-lab/Kode-cli)** ### Kode Agent SDK -- 把 Agent 能力嵌入你的应用 官方 Claude Code Agent SDK 底层与完整 CLI 进程通信 -- 每个并发用户 = 一个终端进程。Kode SDK 是独立库, 无 per-user 进程开销, 可嵌入后端、浏览器插件、嵌入式设备等任意运行时。 GitHub: **[shareAI-lab/Kode-agent-sdk](https://github.com/shareAI-lab/Kode-agent-sdk)** --- ## 姊妹教程: 从*被动临时会话*到*主动常驻助手* 本仓库教的 harness 属于 **用完即走** 型 -- 开终端、给 agent 任务、做完关掉, 下次重开是全新会话。Claude Code 就是这种模式。 但 [OpenClaw](https://github.com/openclaw/openclaw) 证明了另一种可能: 在同样的 agent core 之上, 加两个 harness 机制就能让 agent 从 "踹一下动一下" 变成 "自己隔 30 秒醒一次找活干": - **心跳 (Heartbeat)** -- 每 30 秒 harness 给 agent 发一条消息, 让它检查有没有事可做。没事就继续睡, 有事立刻行动。 - **定时任务 (Cron)** -- agent 可以给自己安排未来要做的事, 到点自动执行。 再加上 IM 多通道路由 (WhatsApp/Telegram/Slack/Discord 等 13+ 平台)、不清空的上下文记忆、Soul 人格系统, agent 就从一个临时工具变成了始终在线的个人 AI 助手。 **[claw0](https://github.com/shareAI-lab/claw0)** 是我们的姊妹教学仓库, 从零拆解这些 harness 机制: ``` claw agent = agent core + heartbeat + cron + IM chat + memory + soul ``` ``` learn-claude-code claw0 (agent harness 内核: (主动式常驻 harness: 循环、工具、规划、 心跳、定时任务、IM 通道、 团队、worktree 隔离) 记忆、Soul 人格) ``` ## 许可证 MIT --- **模型就是 Agent。代码是 Harness。造好 Harness,Agent 会完成剩下的。** **Bash is all you need. Real agents are all the universe needs.** ================================================ FILE: README.md ================================================ [English](./README.md) | [中文](./README-zh.md) | [日本語](./README-ja.md) # Learn Claude Code -- Harness Engineering for Real Agents ## The Model IS the Agent Before we talk about code, let's get one thing absolutely straight. **An agent is a model. Not a framework. Not a prompt chain. Not a drag-and-drop workflow.** ### What an Agent IS An agent is a neural network -- a Transformer, an RNN, a learned function -- that has been trained, through billions of gradient updates on action-sequence data, to perceive an environment, reason about goals, and take actions to achieve them. The word "agent" in AI has always meant this. Always. A human is an agent. A biological neural network, shaped by millions of years of evolutionary training, perceiving the world through senses, reasoning through a brain, acting through a body. When DeepMind, OpenAI, or Anthropic say "agent," they mean the same thing the field has meant since its inception: **a model that has learned to act.** The proof is written in history: - **2013 -- DeepMind DQN plays Atari.** A single neural network, receiving only raw pixels and game scores, learned to play 7 Atari 2600 games -- surpassing all prior algorithms and beating human experts on 3 of them. By 2015, the same architecture scaled to [49 games and matched professional human testers](https://www.nature.com/articles/nature14236), published in *Nature*. No game-specific rules. No decision trees. One model, learning from experience. That model was the agent. - **2019 -- OpenAI Five conquers Dota 2.** Five neural networks, having played [45,000 years of Dota 2](https://openai.com/index/openai-five-defeats-dota-2-world-champions/) against themselves in 10 months, defeated **OG** -- the reigning TI8 world champions -- 2-0 on a San Francisco livestream. In a subsequent public arena, the AI won 99.4% of 42,729 games against all comers. No scripted strategies. No meta-programmed team coordination. The models learned teamwork, tactics, and real-time adaptation entirely through self-play. - **2019 -- DeepMind AlphaStar masters StarCraft II.** AlphaStar [beat professional players 10-1](https://deepmind.google/blog/alphastar-mastering-the-real-time-strategy-game-starcraft-ii/) in a closed-door match, and later achieved [Grandmaster status](https://www.nature.com/articles/d41586-019-03298-6) on European servers -- top 0.15% of 90,000 players. A game with imperfect information, real-time decisions, and a combinatorial action space that dwarfs chess and Go. The agent? A model. Trained. Not scripted. - **2019 -- Tencent Jueyu dominates Honor of Kings.** Tencent AI Lab's "Jueyu" [defeated KPL professional players](https://www.jiemian.com/article/3371171.html) in a full 5v5 match at the World Champion Cup. In 1v1 mode, pros won only [1 out of 15 games and never survived past 8 minutes](https://developer.aliyun.com/article/851058). Training intensity: one day equaled 440 human years. By 2021, Jueyu surpassed KPL pros across the full hero pool. No handcrafted matchup tables. No scripted compositions. A model that learned the entire game from scratch through self-play. - **2024-2025 -- LLM agents reshape software engineering.** Claude, GPT, Gemini -- large language models trained on the entirety of human code and reasoning -- are deployed as coding agents. They read codebases, write implementations, debug failures, coordinate in teams. The architecture is identical to every agent before them: a trained model, placed in an environment, given tools to perceive and act. The only difference is the scale of what they've learned and the generality of the tasks they solve. Every one of these milestones shares the same truth: **the "agent" is never the surrounding code. The agent is always the model.** ### What an Agent Is NOT The word "agent" has been hijacked by an entire cottage industry of prompt plumbing. Drag-and-drop workflow builders. No-code "AI agent" platforms. Prompt-chain orchestration libraries. They all share the same delusion: that wiring together LLM API calls with if-else branches, node graphs, and hardcoded routing logic constitutes "building an agent." It doesn't. What they build is a Rube Goldberg machine -- an over-engineered, brittle pipeline of procedural rules, with an LLM wedged in as a glorified text-completion node. That is not an agent. That is a shell script with delusions of grandeur. **Prompt plumbing "agents" are the fantasy of programmers who don't train models.** They attempt to brute-force intelligence by stacking procedural logic -- massive rule trees, node graphs, chain-of-prompt waterfalls -- and praying that enough glue code will somehow emergently produce autonomous behavior. It won't. You cannot engineer your way to agency. Agency is learned, not programmed. Those systems are dead on arrival: fragile, unscalable, fundamentally incapable of generalization. They are the modern resurrection of GOFAI (Good Old-Fashioned AI) -- the symbolic rule systems the field abandoned decades ago, now spray-painted with an LLM veneer. Different packaging, same dead end. ### The Mind Shift: From "Developing Agents" to Developing Harness When someone says "I'm developing an agent," they can only mean one of two things: **1. Training the model.** Adjusting weights through reinforcement learning, fine-tuning, RLHF, or other gradient-based methods. Collecting task-process data -- the actual sequences of perception, reasoning, and action in real domains -- and using it to shape the model's behavior. This is what DeepMind, OpenAI, Tencent AI Lab, and Anthropic do. This is agent development in the truest sense. **2. Building the harness.** Writing the code that gives the model an environment to operate in. This is what most of us do, and it is the focus of this repository. A harness is everything the agent needs to function in a specific domain: ``` Harness = Tools + Knowledge + Observation + Action Interfaces + Permissions Tools: file I/O, shell, network, database, browser Knowledge: product docs, domain references, API specs, style guides Observation: git diff, error logs, browser state, sensor data Action: CLI commands, API calls, UI interactions Permissions: sandboxing, approval workflows, trust boundaries ``` The model decides. The harness executes. The model reasons. The harness provides context. The model is the driver. The harness is the vehicle. **A coding agent's harness is its IDE, terminal, and filesystem access.** A farm agent's harness is its sensor array, irrigation controls, and weather data feeds. A hotel agent's harness is its booking system, guest communication channels, and facility management APIs. The agent -- the intelligence, the decision-maker -- is always the model. The harness changes per domain. The agent generalizes across them. This repo teaches you to build vehicles. Vehicles for coding. But the design patterns generalize to any domain: farm management, hotel operations, manufacturing, logistics, healthcare, education, scientific research. Anywhere a task needs to be perceived, reasoned about, and acted upon -- an agent needs a harness. ### What Harness Engineers Actually Do If you are reading this repository, you are likely a harness engineer -- and that is a powerful thing to be. Here is your real job: - **Implement tools.** Give the agent hands. File read/write, shell execution, API calls, browser control, database queries. Each tool is an action the agent can take in its environment. Design them to be atomic, composable, and well-described. - **Curate knowledge.** Give the agent domain expertise. Product documentation, architectural decision records, style guides, regulatory requirements. Load them on-demand (s05), not upfront. The agent should know what's available and pull what it needs. - **Manage context.** Give the agent clean memory. Subagent isolation (s04) prevents noise from leaking. Context compression (s06) prevents history from overwhelming. Task systems (s07) persist goals beyond any single conversation. - **Control permissions.** Give the agent boundaries. Sandbox file access. Require approval for destructive operations. Enforce trust boundaries between the agent and external systems. This is where safety engineering meets harness engineering. - **Collect task-process data.** Every action sequence the agent executes in your harness is training signal. The perception-reasoning-action traces from real deployments are the raw material for fine-tuning the next generation of agent models. Your harness doesn't just serve the agent -- it can help improve the agent. You are not writing the intelligence. You are building the world the intelligence inhabits. The quality of that world -- how clearly the agent can perceive, how precisely it can act, how rich its available knowledge is -- directly determines how effectively the intelligence can express itself. **Build great harnesses. The agent will do the rest.** ### Why Claude Code -- A Masterclass in Harness Engineering Why does this repository dissect Claude Code specifically? Because Claude Code is the most elegant and fully-realized agent harness we have seen. Not because of any single clever trick, but because of what it *doesn't* do: it doesn't try to be the agent. It doesn't impose rigid workflows. It doesn't second-guess the model with elaborate decision trees. It provides the model with tools, knowledge, context management, and permission boundaries -- then gets out of the way. Look at what Claude Code actually is, stripped to its essence: ``` Claude Code = one agent loop + tools (bash, read, write, edit, glob, grep, browser...) + on-demand skill loading + context compression + subagent spawning + task system with dependency graph + team coordination with async mailboxes + worktree isolation for parallel execution + permission governance ``` That's it. That's the entire architecture. Every component is a harness mechanism -- a piece of the world built for the agent to inhabit. The agent itself? It's Claude. A model. Trained by Anthropic on the full breadth of human reasoning and code. The harness doesn't make Claude smart. Claude is already smart. The harness gives Claude hands, eyes, and a workspace. This is why Claude Code is the ideal teaching subject: **it demonstrates what happens when you trust the model and focus your engineering on the harness.** Every session in this repository (s01-s12) reverse-engineers one harness mechanism from Claude Code's architecture. By the end, you understand not just how Claude Code works, but the universal principles of harness engineering that apply to any agent in any domain. The lesson is not "copy Claude Code." The lesson is: **the best agent products are built by engineers who understand that their job is harness, not intelligence.** --- ## The Vision: Fill the Universe with Real Agents This is not just about coding agents. Every domain where humans perform complex, multi-step, judgment-intensive work is a domain where agents can operate -- given the right harness. The patterns in this repository are universal: ``` Estate management agent = model + property sensors + maintenance tools + tenant comms Agricultural agent = model + soil/weather data + irrigation controls + crop knowledge Hotel operations agent = model + booking system + guest channels + facility APIs Medical research agent = model + literature search + lab instruments + protocol docs Manufacturing agent = model + production line sensors + quality controls + logistics Education agent = model + curriculum knowledge + student progress + assessment tools ``` The loop is always the same. The tools change. The knowledge changes. The permissions change. The agent -- the model -- generalizes. Every harness engineer reading this repository is learning patterns that apply far beyond software engineering. You are learning to build the infrastructure for an intelligent, automated future. Every well-designed harness deployed in a real domain is one more place where an agent can perceive, reason, and act. First we fill the workshops. Then the farms, the hospitals, the factories. Then the cities. Then the planet. **Bash is all you need. Real agents are all the universe needs.** --- ``` THE AGENT PATTERN ================= User --> messages[] --> LLM --> response | stop_reason == "tool_use"? / \ yes no | | execute tools return text append results loop back -----------------> messages[] That's the minimal loop. Every AI agent needs this loop. The MODEL decides when to call tools and when to stop. The CODE just executes what the model asks for. This repo teaches you to build what surrounds this loop -- the harness that makes the agent effective in a specific domain. ``` **12 progressive sessions, from a simple loop to isolated autonomous execution.** **Each session adds one harness mechanism. Each mechanism has one motto.** > **s01**   *"One loop & Bash is all you need"* — one tool + one loop = an agent > > **s02**   *"Adding a tool means adding one handler"* — the loop stays the same; new tools register into the dispatch map > > **s03**   *"An agent without a plan drifts"* — list the steps first, then execute; completion doubles > > **s04**   *"Break big tasks down; each subtask gets a clean context"* — subagents use independent messages[], keeping the main conversation clean > > **s05**   *"Load knowledge when you need it, not upfront"* — inject via tool_result, not the system prompt > > **s06**   *"Context will fill up; you need a way to make room"* — three-layer compression strategy for infinite sessions > > **s07**   *"Break big goals into small tasks, order them, persist to disk"* — a file-based task graph with dependencies, laying the foundation for multi-agent collaboration > > **s08**   *"Run slow operations in the background; the agent keeps thinking"* — daemon threads run commands, inject notifications on completion > > **s09**   *"When the task is too big for one, delegate to teammates"* — persistent teammates + async mailboxes > > **s10**   *"Teammates need shared communication rules"* — one request-response pattern drives all negotiation > > **s11**   *"Teammates scan the board and claim tasks themselves"* — no need for the lead to assign each one > > **s12**   *"Each works in its own directory, no interference"* — tasks manage goals, worktrees manage directories, bound by ID --- ## The Core Pattern ```python def agent_loop(messages): while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": output = TOOL_HANDLERS[block.name](**block.input) results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) messages.append({"role": "user", "content": results}) ``` Every session layers one harness mechanism on top of this loop -- without changing the loop itself. The loop belongs to the agent. The mechanisms belong to the harness. ## Scope (Important) This repository is a 0->1 learning project for harness engineering -- building the environment that surrounds an agent model. It intentionally simplifies or omits several production mechanisms: - Full event/hook buses (for example PreToolUse, SessionStart/End, ConfigChange). s12 includes only a minimal append-only lifecycle event stream for teaching. - Rule-based permission governance and trust workflows - Session lifecycle controls (resume/fork) and advanced worktree lifecycle controls - Full MCP runtime details (transport/OAuth/resource subscribe/polling) Treat the team JSONL mailbox protocol in this repo as a teaching implementation, not a claim about any specific production internals. ## Quick Start ```sh git clone https://github.com/shareAI-lab/learn-claude-code cd learn-claude-code pip install -r requirements.txt cp .env.example .env # Edit .env with your ANTHROPIC_API_KEY python agents/s01_agent_loop.py # Start here python agents/s12_worktree_task_isolation.py # Full progression endpoint python agents/s_full.py # Capstone: all mechanisms combined ``` ### Web Platform Interactive visualizations, step-through diagrams, source viewer, and documentation. ```sh cd web && npm install && npm run dev # http://localhost:3000 ``` ## Learning Path ``` Phase 1: THE LOOP Phase 2: PLANNING & KNOWLEDGE ================== ============================== s01 The Agent Loop [1] s03 TodoWrite [5] while + stop_reason TodoManager + nag reminder | | +-> s02 Tool Use [4] s04 Subagents [5] dispatch map: name->handler fresh messages[] per child | s05 Skills [5] SKILL.md via tool_result | s06 Context Compact [5] 3-layer compression Phase 3: PERSISTENCE Phase 4: TEAMS ================== ===================== s07 Tasks [8] s09 Agent Teams [9] file-based CRUD + deps graph teammates + JSONL mailboxes | | s08 Background Tasks [6] s10 Team Protocols [12] daemon threads + notify queue shutdown + plan approval FSM | s11 Autonomous Agents [14] idle cycle + auto-claim | s12 Worktree Isolation [16] task coordination + optional isolated execution lanes [N] = number of tools ``` ## Architecture ``` learn-claude-code/ | |-- agents/ # Python reference implementations (s01-s12 + s_full capstone) |-- docs/{en,zh,ja}/ # Mental-model-first documentation (3 languages) |-- web/ # Interactive learning platform (Next.js) |-- skills/ # Skill files for s05 +-- .github/workflows/ci.yml # CI: typecheck + build ``` ## Documentation Mental-model-first: problem, solution, ASCII diagram, minimal code. Available in [English](./docs/en/) | [中文](./docs/zh/) | [日本語](./docs/ja/). | Session | Topic | Motto | |---------|-------|-------| | [s01](./docs/en/s01-the-agent-loop.md) | The Agent Loop | *One loop & Bash is all you need* | | [s02](./docs/en/s02-tool-use.md) | Tool Use | *Adding a tool means adding one handler* | | [s03](./docs/en/s03-todo-write.md) | TodoWrite | *An agent without a plan drifts* | | [s04](./docs/en/s04-subagent.md) | Subagents | *Break big tasks down; each subtask gets a clean context* | | [s05](./docs/en/s05-skill-loading.md) | Skills | *Load knowledge when you need it, not upfront* | | [s06](./docs/en/s06-context-compact.md) | Context Compact | *Context will fill up; you need a way to make room* | | [s07](./docs/en/s07-task-system.md) | Tasks | *Break big goals into small tasks, order them, persist to disk* | | [s08](./docs/en/s08-background-tasks.md) | Background Tasks | *Run slow operations in the background; the agent keeps thinking* | | [s09](./docs/en/s09-agent-teams.md) | Agent Teams | *When the task is too big for one, delegate to teammates* | | [s10](./docs/en/s10-team-protocols.md) | Team Protocols | *Teammates need shared communication rules* | | [s11](./docs/en/s11-autonomous-agents.md) | Autonomous Agents | *Teammates scan the board and claim tasks themselves* | | [s12](./docs/en/s12-worktree-task-isolation.md) | Worktree + Task Isolation | *Each works in its own directory, no interference* | ## What's Next -- from understanding to shipping After the 12 sessions you understand how harness engineering works inside out. Two ways to put that knowledge to work: ### Kode Agent CLI -- Open-Source Coding Agent CLI > `npm i -g @shareai-lab/kode` Skill & LSP support, Windows-ready, pluggable with GLM / MiniMax / DeepSeek and other open models. Install and go. GitHub: **[shareAI-lab/Kode-cli](https://github.com/shareAI-lab/Kode-cli)** ### Kode Agent SDK -- Embed Agent Capabilities in Your App The official Claude Code Agent SDK communicates with a full CLI process under the hood -- each concurrent user means a separate terminal process. Kode SDK is a standalone library with no per-user process overhead, embeddable in backends, browser extensions, embedded devices, or any runtime. GitHub: **[shareAI-lab/Kode-agent-sdk](https://github.com/shareAI-lab/Kode-agent-sdk)** --- ## Sister Repo: from *on-demand sessions* to *always-on assistant* The harness this repo teaches is **use-and-discard** -- open a terminal, give the agent a task, close when done, next session starts blank. That is the Claude Code model. [OpenClaw](https://github.com/openclaw/openclaw) proved another possibility: on top of the same agent core, two harness mechanisms turn the agent from "poke it to make it move" into "it wakes up every 30 seconds to look for work": - **Heartbeat** -- every 30s the harness sends the agent a message to check if there is anything to do. Nothing? Go back to sleep. Something? Act immediately. - **Cron** -- the agent can schedule its own future tasks, executed automatically when the time comes. Add multi-channel IM routing (WhatsApp / Telegram / Slack / Discord, 13+ platforms), persistent context memory, and a Soul personality system, and the agent goes from a disposable tool to an always-on personal AI assistant. **[claw0](https://github.com/shareAI-lab/claw0)** is our companion teaching repo that deconstructs these harness mechanisms from scratch: ``` claw agent = agent core + heartbeat + cron + IM chat + memory + soul ``` ``` learn-claude-code claw0 (agent harness core: (proactive always-on harness: loop, tools, planning, heartbeat, cron, IM channels, teams, worktree isolation) memory, soul personality) ``` ## About
Scan with Wechat to follow us, or follow on X: [shareAI-Lab](https://x.com/baicai003) ## License MIT --- **The model is the agent. The code is the harness. Build great harnesses. The agent will do the rest.** **Bash is all you need. Real agents are all the universe needs.** ================================================ FILE: agents/__init__.py ================================================ # agents/ - Harness implementations (s01-s12) + full reference (s_full) # Each file is self-contained and runnable: python agents/s01_agent_loop.py # The model is the agent. These files are the harness. ================================================ FILE: agents/s01_agent_loop.py ================================================ #!/usr/bin/env python3 # Harness: the loop -- the model's first connection to the real world. """ s01_agent_loop.py - The Agent Loop The entire secret of an AI coding agent in one pattern: while stop_reason == "tool_use": response = LLM(messages, tools) execute tools append results +----------+ +-------+ +---------+ | User | ---> | LLM | ---> | Tool | | prompt | | | | execute | +----------+ +---+---+ +----+----+ ^ | | tool_result | +---------------+ (loop continues) This is the core loop: feed tool results back to the model until the model decides to stop. Production agents layer policy, hooks, and lifecycle controls on top. """ import os import subprocess from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] SYSTEM = f"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain." TOOLS = [{ "name": "bash", "description": "Run a shell command.", "input_schema": { "type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"], }, }] def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(command, shell=True, cwd=os.getcwd(), capture_output=True, text=True, timeout=120) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" # -- The core pattern: a while loop that calls tools until the model stops -- def agent_loop(messages: list): while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) # Append assistant turn messages.append({"role": "assistant", "content": response.content}) # If the model didn't call a tool, we're done if response.stop_reason != "tool_use": return # Execute each tool call, collect results results = [] for block in response.content: if block.type == "tool_use": print(f"\033[33m$ {block.input['command']}\033[0m") output = run_bash(block.input["command"]) print(output[:200]) results.append({"type": "tool_result", "tool_use_id": block.id, "content": output}) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms01 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s02_tool_use.py ================================================ #!/usr/bin/env python3 # Harness: tool dispatch -- expanding what the model can reach. """ s02_tool_use.py - Tools The agent loop from s01 didn't change. We just added tools to the array and a dispatch map to route calls. +----------+ +-------+ +------------------+ | User | ---> | LLM | ---> | Tool Dispatch | | prompt | | | | { | +----------+ +---+---+ | bash: run_bash | ^ | read: run_read | | | write: run_wr | +----------+ edit: run_edit | tool_result| } | +------------------+ Key insight: "The loop didn't change at all. I just added tools." """ import os import subprocess from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain." def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def run_read(path: str, limit: int = None) -> str: try: text = safe_path(path).read_text() lines = text.splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more lines)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes to {path}" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = safe_path(path) content = fp.read_text() if old_text not in content: return f"Error: Text not found in {path}" fp.write_text(content.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" # -- The dispatch map: {tool_name: handler} -- TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), } TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, ] def agent_loop(messages: list): while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) output = handler(**block.input) if handler else f"Unknown tool: {block.name}" print(f"> {block.name}: {output[:200]}") results.append({"type": "tool_result", "tool_use_id": block.id, "content": output}) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms02 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s03_todo_write.py ================================================ #!/usr/bin/env python3 # Harness: planning -- keeping the model on course without scripting the route. """ s03_todo_write.py - TodoWrite The model tracks its own progress via a TodoManager. A nag reminder forces it to keep updating when it forgets. +----------+ +-------+ +---------+ | User | ---> | LLM | ---> | Tools | | prompt | | | | + todo | +----------+ +---+---+ +----+----+ ^ | | tool_result | +---------------+ | +-----------+-----------+ | TodoManager state | | [ ] task A | | [>] task B <- doing | | [x] task C | +-----------------------+ | if rounds_since_todo >= 3: inject Key insight: "The agent can track its own progress -- and I can see it." """ import os import subprocess from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] SYSTEM = f"""You are a coding agent at {WORKDIR}. Use the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done. Prefer tools over prose.""" # -- TodoManager: structured state the LLM writes to -- class TodoManager: def __init__(self): self.items = [] def update(self, items: list) -> str: if len(items) > 20: raise ValueError("Max 20 todos allowed") validated = [] in_progress_count = 0 for i, item in enumerate(items): text = str(item.get("text", "")).strip() status = str(item.get("status", "pending")).lower() item_id = str(item.get("id", str(i + 1))) if not text: raise ValueError(f"Item {item_id}: text required") if status not in ("pending", "in_progress", "completed"): raise ValueError(f"Item {item_id}: invalid status '{status}'") if status == "in_progress": in_progress_count += 1 validated.append({"id": item_id, "text": text, "status": status}) if in_progress_count > 1: raise ValueError("Only one task can be in_progress at a time") self.items = validated return self.render() def render(self) -> str: if not self.items: return "No todos." lines = [] for item in self.items: marker = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}[item["status"]] lines.append(f"{marker} #{item['id']}: {item['text']}") done = sum(1 for t in self.items if t["status"] == "completed") lines.append(f"\n({done}/{len(self.items)} completed)") return "\n".join(lines) TODO = TodoManager() # -- Tool implementations -- def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def run_read(path: str, limit: int = None) -> str: try: lines = safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = safe_path(path) content = fp.read_text() if old_text not in content: return f"Error: Text not found in {path}" fp.write_text(content.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), "todo": lambda **kw: TODO.update(kw["items"]), } TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "todo", "description": "Update task list. Track progress on multi-step tasks.", "input_schema": {"type": "object", "properties": {"items": {"type": "array", "items": {"type": "object", "properties": {"id": {"type": "string"}, "text": {"type": "string"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]}}, "required": ["id", "text", "status"]}}}, "required": ["items"]}}, ] # -- Agent loop with nag reminder injection -- def agent_loop(messages: list): rounds_since_todo = 0 while True: # Nag reminder is injected below, alongside tool results response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] used_todo = False for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) if block.name == "todo": used_todo = True rounds_since_todo = 0 if used_todo else rounds_since_todo + 1 if rounds_since_todo >= 3: results.insert(0, {"type": "text", "text": "Update your todos."}) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms03 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s04_subagent.py ================================================ #!/usr/bin/env python3 # Harness: context isolation -- protecting the model's clarity of thought. """ s04_subagent.py - Subagents Spawn a child agent with fresh messages=[]. The child works in its own context, sharing the filesystem, then returns only a summary to the parent. Parent agent Subagent +------------------+ +------------------+ | messages=[...] | | messages=[] | <-- fresh | | dispatch | | | tool: task | ---------->| while tool_use: | | prompt="..." | | call tools | | description="" | | append results | | | summary | | | result = "..." | <--------- | return last text | +------------------+ +------------------+ | Parent context stays clean. Subagent context is discarded. Key insight: "Process isolation gives context isolation for free." """ import os import subprocess from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] SYSTEM = f"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks." SUBAGENT_SYSTEM = f"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings." # -- Tool implementations shared by parent and child -- def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def run_read(path: str, limit: int = None) -> str: try: lines = safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = safe_path(path) content = fp.read_text() if old_text not in content: return f"Error: Text not found in {path}" fp.write_text(content.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), } # Child gets all base tools except task (no recursive spawning) CHILD_TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, ] # -- Subagent: fresh context, filtered tools, summary-only return -- def run_subagent(prompt: str) -> str: sub_messages = [{"role": "user", "content": prompt}] # fresh context for _ in range(30): # safety limit response = client.messages.create( model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages, tools=CHILD_TOOLS, max_tokens=8000, ) sub_messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": break results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) output = handler(**block.input) if handler else f"Unknown tool: {block.name}" results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)[:50000]}) sub_messages.append({"role": "user", "content": results}) # Only the final text returns to the parent -- child context is discarded return "".join(b.text for b in response.content if hasattr(b, "text")) or "(no summary)" # -- Parent tools: base tools + task dispatcher -- PARENT_TOOLS = CHILD_TOOLS + [ {"name": "task", "description": "Spawn a subagent with fresh context. It shares the filesystem but not conversation history.", "input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}, "description": {"type": "string", "description": "Short description of the task"}}, "required": ["prompt"]}}, ] def agent_loop(messages: list): while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=PARENT_TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": if block.name == "task": desc = block.input.get("description", "subtask") print(f"> task ({desc}): {block.input['prompt'][:80]}") output = run_subagent(block.input["prompt"]) else: handler = TOOL_HANDLERS.get(block.name) output = handler(**block.input) if handler else f"Unknown tool: {block.name}" print(f" {str(output)[:200]}") results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms04 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s05_skill_loading.py ================================================ #!/usr/bin/env python3 # Harness: on-demand knowledge -- domain expertise, loaded when the model asks. """ s05_skill_loading.py - Skills Two-layer skill injection that avoids bloating the system prompt: Layer 1 (cheap): skill names in system prompt (~100 tokens/skill) Layer 2 (on demand): full skill body in tool_result skills/ pdf/ SKILL.md <-- frontmatter (name, description) + body code-review/ SKILL.md System prompt: +--------------------------------------+ | You are a coding agent. | | Skills available: | | - pdf: Process PDF files... | <-- Layer 1: metadata only | - code-review: Review code... | +--------------------------------------+ When model calls load_skill("pdf"): +--------------------------------------+ | tool_result: | | | | Full PDF processing instructions | <-- Layer 2: full body | Step 1: ... | | Step 2: ... | | | +--------------------------------------+ Key insight: "Don't put everything in the system prompt. Load on demand." """ import os import re import subprocess from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] SKILLS_DIR = WORKDIR / "skills" # -- SkillLoader: scan skills//SKILL.md with YAML frontmatter -- class SkillLoader: def __init__(self, skills_dir: Path): self.skills_dir = skills_dir self.skills = {} self._load_all() def _load_all(self): if not self.skills_dir.exists(): return for f in sorted(self.skills_dir.rglob("SKILL.md")): text = f.read_text() meta, body = self._parse_frontmatter(text) name = meta.get("name", f.parent.name) self.skills[name] = {"meta": meta, "body": body, "path": str(f)} def _parse_frontmatter(self, text: str) -> tuple: """Parse YAML frontmatter between --- delimiters.""" match = re.match(r"^---\n(.*?)\n---\n(.*)", text, re.DOTALL) if not match: return {}, text meta = {} for line in match.group(1).strip().splitlines(): if ":" in line: key, val = line.split(":", 1) meta[key.strip()] = val.strip() return meta, match.group(2).strip() def get_descriptions(self) -> str: """Layer 1: short descriptions for the system prompt.""" if not self.skills: return "(no skills available)" lines = [] for name, skill in self.skills.items(): desc = skill["meta"].get("description", "No description") tags = skill["meta"].get("tags", "") line = f" - {name}: {desc}" if tags: line += f" [{tags}]" lines.append(line) return "\n".join(lines) def get_content(self, name: str) -> str: """Layer 2: full skill body returned in tool_result.""" skill = self.skills.get(name) if not skill: return f"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}" return f"\n{skill['body']}\n" SKILL_LOADER = SkillLoader(SKILLS_DIR) # Layer 1: skill metadata injected into system prompt SYSTEM = f"""You are a coding agent at {WORKDIR}. Use load_skill to access specialized knowledge before tackling unfamiliar topics. Skills available: {SKILL_LOADER.get_descriptions()}""" # -- Tool implementations -- def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def run_read(path: str, limit: int = None) -> str: try: lines = safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = safe_path(path) content = fp.read_text() if old_text not in content: return f"Error: Text not found in {path}" fp.write_text(content.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), "load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]), } TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "load_skill", "description": "Load specialized knowledge by name.", "input_schema": {"type": "object", "properties": {"name": {"type": "string", "description": "Skill name to load"}}, "required": ["name"]}}, ] def agent_loop(messages: list): while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms05 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s06_context_compact.py ================================================ #!/usr/bin/env python3 # Harness: compression -- clean memory for infinite sessions. """ s06_context_compact.py - Compact Three-layer compression pipeline so the agent can work forever: Every turn: +------------------+ | Tool call result | +------------------+ | v [Layer 1: micro_compact] (silent, every turn) Replace tool_result content older than last 3 with "[Previous: used {tool_name}]" | v [Check: tokens > 50000?] | | no yes | | v v continue [Layer 2: auto_compact] Save full transcript to .transcripts/ Ask LLM to summarize conversation. Replace all messages with [summary]. | v [Layer 3: compact tool] Model calls compact -> immediate summarization. Same as auto, triggered manually. Key insight: "The agent can forget strategically and keep working forever." """ import json import os import subprocess import time from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] SYSTEM = f"You are a coding agent at {WORKDIR}. Use tools to solve tasks." THRESHOLD = 50000 TRANSCRIPT_DIR = WORKDIR / ".transcripts" KEEP_RECENT = 3 def estimate_tokens(messages: list) -> int: """Rough token count: ~4 chars per token.""" return len(str(messages)) // 4 # -- Layer 1: micro_compact - replace old tool results with placeholders -- def micro_compact(messages: list) -> list: # Collect (msg_index, part_index, tool_result_dict) for all tool_result entries tool_results = [] for msg_idx, msg in enumerate(messages): if msg["role"] == "user" and isinstance(msg.get("content"), list): for part_idx, part in enumerate(msg["content"]): if isinstance(part, dict) and part.get("type") == "tool_result": tool_results.append((msg_idx, part_idx, part)) if len(tool_results) <= KEEP_RECENT: return messages # Find tool_name for each result by matching tool_use_id in prior assistant messages tool_name_map = {} for msg in messages: if msg["role"] == "assistant": content = msg.get("content", []) if isinstance(content, list): for block in content: if hasattr(block, "type") and block.type == "tool_use": tool_name_map[block.id] = block.name # Clear old results (keep last KEEP_RECENT) to_clear = tool_results[:-KEEP_RECENT] for _, _, result in to_clear: if isinstance(result.get("content"), str) and len(result["content"]) > 100: tool_id = result.get("tool_use_id", "") tool_name = tool_name_map.get(tool_id, "unknown") result["content"] = f"[Previous: used {tool_name}]" return messages # -- Layer 2: auto_compact - save transcript, summarize, replace messages -- def auto_compact(messages: list) -> list: # Save full transcript to disk TRANSCRIPT_DIR.mkdir(exist_ok=True) transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl" with open(transcript_path, "w") as f: for msg in messages: f.write(json.dumps(msg, default=str) + "\n") print(f"[transcript saved: {transcript_path}]") # Ask LLM to summarize conversation_text = json.dumps(messages, default=str)[:80000] response = client.messages.create( model=MODEL, messages=[{"role": "user", "content": "Summarize this conversation for continuity. Include: " "1) What was accomplished, 2) Current state, 3) Key decisions made. " "Be concise but preserve critical details.\n\n" + conversation_text}], max_tokens=2000, ) summary = response.content[0].text # Replace all messages with compressed summary return [ {"role": "user", "content": f"[Conversation compressed. Transcript: {transcript_path}]\n\n{summary}"}, {"role": "assistant", "content": "Understood. I have the context from the summary. Continuing."}, ] # -- Tool implementations -- def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def run_read(path: str, limit: int = None) -> str: try: lines = safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = safe_path(path) content = fp.read_text() if old_text not in content: return f"Error: Text not found in {path}" fp.write_text(content.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), "compact": lambda **kw: "Manual compression requested.", } TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "compact", "description": "Trigger manual conversation compression.", "input_schema": {"type": "object", "properties": {"focus": {"type": "string", "description": "What to preserve in the summary"}}}}, ] def agent_loop(messages: list): while True: # Layer 1: micro_compact before each LLM call micro_compact(messages) # Layer 2: auto_compact if token estimate exceeds threshold if estimate_tokens(messages) > THRESHOLD: print("[auto_compact triggered]") messages[:] = auto_compact(messages) response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] manual_compact = False for block in response.content: if block.type == "tool_use": if block.name == "compact": manual_compact = True output = "Compressing..." else: handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) messages.append({"role": "user", "content": results}) # Layer 3: manual compact triggered by the compact tool if manual_compact: print("[manual compact]") messages[:] = auto_compact(messages) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms06 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s07_task_system.py ================================================ #!/usr/bin/env python3 # Harness: persistent tasks -- goals that outlive any single conversation. """ s07_task_system.py - Tasks Tasks persist as JSON files in .tasks/ so they survive context compression. Each task has a dependency graph (blockedBy/blocks). .tasks/ task_1.json {"id":1, "subject":"...", "status":"completed", ...} task_2.json {"id":2, "blockedBy":[1], "status":"pending", ...} task_3.json {"id":3, "blockedBy":[2], "blocks":[], ...} Dependency resolution: +----------+ +----------+ +----------+ | task 1 | --> | task 2 | --> | task 3 | | complete | | blocked | | blocked | +----------+ +----------+ +----------+ | ^ +--- completing task 1 removes it from task 2's blockedBy Key insight: "State that survives compression -- because it's outside the conversation." """ import json import os import subprocess from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] TASKS_DIR = WORKDIR / ".tasks" SYSTEM = f"You are a coding agent at {WORKDIR}. Use task tools to plan and track work." # -- TaskManager: CRUD with dependency graph, persisted as JSON files -- class TaskManager: def __init__(self, tasks_dir: Path): self.dir = tasks_dir self.dir.mkdir(exist_ok=True) self._next_id = self._max_id() + 1 def _max_id(self) -> int: ids = [int(f.stem.split("_")[1]) for f in self.dir.glob("task_*.json")] return max(ids) if ids else 0 def _load(self, task_id: int) -> dict: path = self.dir / f"task_{task_id}.json" if not path.exists(): raise ValueError(f"Task {task_id} not found") return json.loads(path.read_text()) def _save(self, task: dict): path = self.dir / f"task_{task['id']}.json" path.write_text(json.dumps(task, indent=2)) def create(self, subject: str, description: str = "") -> str: task = { "id": self._next_id, "subject": subject, "description": description, "status": "pending", "blockedBy": [], "blocks": [], "owner": "", } self._save(task) self._next_id += 1 return json.dumps(task, indent=2) def get(self, task_id: int) -> str: return json.dumps(self._load(task_id), indent=2) def update(self, task_id: int, status: str = None, add_blocked_by: list = None, add_blocks: list = None) -> str: task = self._load(task_id) if status: if status not in ("pending", "in_progress", "completed"): raise ValueError(f"Invalid status: {status}") task["status"] = status # When a task is completed, remove it from all other tasks' blockedBy if status == "completed": self._clear_dependency(task_id) if add_blocked_by: task["blockedBy"] = list(set(task["blockedBy"] + add_blocked_by)) if add_blocks: task["blocks"] = list(set(task["blocks"] + add_blocks)) # Bidirectional: also update the blocked tasks' blockedBy lists for blocked_id in add_blocks: try: blocked = self._load(blocked_id) if task_id not in blocked["blockedBy"]: blocked["blockedBy"].append(task_id) self._save(blocked) except ValueError: pass self._save(task) return json.dumps(task, indent=2) def _clear_dependency(self, completed_id: int): """Remove completed_id from all other tasks' blockedBy lists.""" for f in self.dir.glob("task_*.json"): task = json.loads(f.read_text()) if completed_id in task.get("blockedBy", []): task["blockedBy"].remove(completed_id) self._save(task) def list_all(self) -> str: tasks = [] for f in sorted(self.dir.glob("task_*.json")): tasks.append(json.loads(f.read_text())) if not tasks: return "No tasks." lines = [] for t in tasks: marker = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}.get(t["status"], "[?]") blocked = f" (blocked by: {t['blockedBy']})" if t.get("blockedBy") else "" lines.append(f"{marker} #{t['id']}: {t['subject']}{blocked}") return "\n".join(lines) TASKS = TaskManager(TASKS_DIR) # -- Base tool implementations -- def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def run_read(path: str, limit: int = None) -> str: try: lines = safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = safe_path(path) c = fp.read_text() if old_text not in c: return f"Error: Text not found in {path}" fp.write_text(c.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), "task_create": lambda **kw: TASKS.create(kw["subject"], kw.get("description", "")), "task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status"), kw.get("addBlockedBy"), kw.get("addBlocks")), "task_list": lambda **kw: TASKS.list_all(), "task_get": lambda **kw: TASKS.get(kw["task_id"]), } TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "task_create", "description": "Create a new task.", "input_schema": {"type": "object", "properties": {"subject": {"type": "string"}, "description": {"type": "string"}}, "required": ["subject"]}}, {"name": "task_update", "description": "Update a task's status or dependencies.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]}, "addBlockedBy": {"type": "array", "items": {"type": "integer"}}, "addBlocks": {"type": "array", "items": {"type": "integer"}}}, "required": ["task_id"]}}, {"name": "task_list", "description": "List all tasks with status summary.", "input_schema": {"type": "object", "properties": {}}}, {"name": "task_get", "description": "Get full details of a task by ID.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}}, ] def agent_loop(messages: list): while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms07 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s08_background_tasks.py ================================================ #!/usr/bin/env python3 # Harness: background execution -- the model thinks while the harness waits. """ s08_background_tasks.py - Background Tasks Run commands in background threads. A notification queue is drained before each LLM call to deliver results. Main thread Background thread +-----------------+ +-----------------+ | agent loop | | task executes | | ... | | ... | | [LLM call] <---+------- | enqueue(result) | | ^drain queue | +-----------------+ +-----------------+ Timeline: Agent ----[spawn A]----[spawn B]----[other work]---- | | v v [A runs] [B runs] (parallel) | | +-- notification queue --> [results injected] Key insight: "Fire and forget -- the agent doesn't block while the command runs." """ import os import subprocess import threading import uuid from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] SYSTEM = f"You are a coding agent at {WORKDIR}. Use background_run for long-running commands." # -- BackgroundManager: threaded execution + notification queue -- class BackgroundManager: def __init__(self): self.tasks = {} # task_id -> {status, result, command} self._notification_queue = [] # completed task results self._lock = threading.Lock() def run(self, command: str) -> str: """Start a background thread, return task_id immediately.""" task_id = str(uuid.uuid4())[:8] self.tasks[task_id] = {"status": "running", "result": None, "command": command} thread = threading.Thread( target=self._execute, args=(task_id, command), daemon=True ) thread.start() return f"Background task {task_id} started: {command[:80]}" def _execute(self, task_id: str, command: str): """Thread target: run subprocess, capture output, push to queue.""" try: r = subprocess.run( command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=300 ) output = (r.stdout + r.stderr).strip()[:50000] status = "completed" except subprocess.TimeoutExpired: output = "Error: Timeout (300s)" status = "timeout" except Exception as e: output = f"Error: {e}" status = "error" self.tasks[task_id]["status"] = status self.tasks[task_id]["result"] = output or "(no output)" with self._lock: self._notification_queue.append({ "task_id": task_id, "status": status, "command": command[:80], "result": (output or "(no output)")[:500], }) def check(self, task_id: str = None) -> str: """Check status of one task or list all.""" if task_id: t = self.tasks.get(task_id) if not t: return f"Error: Unknown task {task_id}" return f"[{t['status']}] {t['command'][:60]}\n{t.get('result') or '(running)'}" lines = [] for tid, t in self.tasks.items(): lines.append(f"{tid}: [{t['status']}] {t['command'][:60]}") return "\n".join(lines) if lines else "No background tasks." def drain_notifications(self) -> list: """Return and clear all pending completion notifications.""" with self._lock: notifs = list(self._notification_queue) self._notification_queue.clear() return notifs BG = BackgroundManager() # -- Tool implementations -- def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def run_read(path: str, limit: int = None) -> str: try: lines = safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = safe_path(path) c = fp.read_text() if old_text not in c: return f"Error: Text not found in {path}" fp.write_text(c.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), "background_run": lambda **kw: BG.run(kw["command"]), "check_background": lambda **kw: BG.check(kw.get("task_id")), } TOOLS = [ {"name": "bash", "description": "Run a shell command (blocking).", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "background_run", "description": "Run command in background thread. Returns task_id immediately.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "check_background", "description": "Check background task status. Omit task_id to list all.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "string"}}}}, ] def agent_loop(messages: list): while True: # Drain background notifications and inject as system message before LLM call notifs = BG.drain_notifications() if notifs and messages: notif_text = "\n".join( f"[bg:{n['task_id']}] {n['status']}: {n['result']}" for n in notifs ) messages.append({"role": "user", "content": f"\n{notif_text}\n"}) messages.append({"role": "assistant", "content": "Noted background results."}) response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms08 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s09_agent_teams.py ================================================ #!/usr/bin/env python3 # Harness: team mailboxes -- multiple models, coordinated through files. """ s09_agent_teams.py - Agent Teams Persistent named agents with file-based JSONL inboxes. Each teammate runs its own agent loop in a separate thread. Communication via append-only inboxes. Subagent (s04): spawn -> execute -> return summary -> destroyed Teammate (s09): spawn -> work -> idle -> work -> ... -> shutdown .team/config.json .team/inbox/ +----------------------------+ +------------------+ | {"team_name": "default", | | alice.jsonl | | "members": [ | | bob.jsonl | | {"name":"alice", | | lead.jsonl | | "role":"coder", | +------------------+ | "status":"idle"} | | ]} | send_message("alice", "fix bug"): +----------------------------+ open("alice.jsonl", "a").write(msg) read_inbox("alice"): spawn_teammate("alice","coder",...) msgs = [json.loads(l) for l in ...] | open("alice.jsonl", "w").close() v return msgs # drain Thread: alice Thread: bob +------------------+ +------------------+ | agent_loop | | agent_loop | | status: working | | status: idle | | ... runs tools | | ... waits ... | | status -> idle | | | +------------------+ +------------------+ 5 message types (all declared, not all handled here): +-------------------------+-----------------------------------+ | message | Normal text message | | broadcast | Sent to all teammates | | shutdown_request | Request graceful shutdown (s10) | | shutdown_response | Approve/reject shutdown (s10) | | plan_approval_response | Approve/reject plan (s10) | +-------------------------+-----------------------------------+ Key insight: "Teammates that can talk to each other." """ import json import os import subprocess import threading import time from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] TEAM_DIR = WORKDIR / ".team" INBOX_DIR = TEAM_DIR / "inbox" SYSTEM = f"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes." VALID_MSG_TYPES = { "message", "broadcast", "shutdown_request", "shutdown_response", "plan_approval_response", } # -- MessageBus: JSONL inbox per teammate -- class MessageBus: def __init__(self, inbox_dir: Path): self.dir = inbox_dir self.dir.mkdir(parents=True, exist_ok=True) def send(self, sender: str, to: str, content: str, msg_type: str = "message", extra: dict = None) -> str: if msg_type not in VALID_MSG_TYPES: return f"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}" msg = { "type": msg_type, "from": sender, "content": content, "timestamp": time.time(), } if extra: msg.update(extra) inbox_path = self.dir / f"{to}.jsonl" with open(inbox_path, "a") as f: f.write(json.dumps(msg) + "\n") return f"Sent {msg_type} to {to}" def read_inbox(self, name: str) -> list: inbox_path = self.dir / f"{name}.jsonl" if not inbox_path.exists(): return [] messages = [] for line in inbox_path.read_text().strip().splitlines(): if line: messages.append(json.loads(line)) inbox_path.write_text("") return messages def broadcast(self, sender: str, content: str, teammates: list) -> str: count = 0 for name in teammates: if name != sender: self.send(sender, name, content, "broadcast") count += 1 return f"Broadcast to {count} teammates" BUS = MessageBus(INBOX_DIR) # -- TeammateManager: persistent named agents with config.json -- class TeammateManager: def __init__(self, team_dir: Path): self.dir = team_dir self.dir.mkdir(exist_ok=True) self.config_path = self.dir / "config.json" self.config = self._load_config() self.threads = {} def _load_config(self) -> dict: if self.config_path.exists(): return json.loads(self.config_path.read_text()) return {"team_name": "default", "members": []} def _save_config(self): self.config_path.write_text(json.dumps(self.config, indent=2)) def _find_member(self, name: str) -> dict: for m in self.config["members"]: if m["name"] == name: return m return None def spawn(self, name: str, role: str, prompt: str) -> str: member = self._find_member(name) if member: if member["status"] not in ("idle", "shutdown"): return f"Error: '{name}' is currently {member['status']}" member["status"] = "working" member["role"] = role else: member = {"name": name, "role": role, "status": "working"} self.config["members"].append(member) self._save_config() thread = threading.Thread( target=self._teammate_loop, args=(name, role, prompt), daemon=True, ) self.threads[name] = thread thread.start() return f"Spawned '{name}' (role: {role})" def _teammate_loop(self, name: str, role: str, prompt: str): sys_prompt = ( f"You are '{name}', role: {role}, at {WORKDIR}. " f"Use send_message to communicate. Complete your task." ) messages = [{"role": "user", "content": prompt}] tools = self._teammate_tools() for _ in range(50): inbox = BUS.read_inbox(name) for msg in inbox: messages.append({"role": "user", "content": json.dumps(msg)}) try: response = client.messages.create( model=MODEL, system=sys_prompt, messages=messages, tools=tools, max_tokens=8000, ) except Exception: break messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": break results = [] for block in response.content: if block.type == "tool_use": output = self._exec(name, block.name, block.input) print(f" [{name}] {block.name}: {str(output)[:120]}") results.append({ "type": "tool_result", "tool_use_id": block.id, "content": str(output), }) messages.append({"role": "user", "content": results}) member = self._find_member(name) if member and member["status"] != "shutdown": member["status"] = "idle" self._save_config() def _exec(self, sender: str, tool_name: str, args: dict) -> str: # these base tools are unchanged from s02 if tool_name == "bash": return _run_bash(args["command"]) if tool_name == "read_file": return _run_read(args["path"]) if tool_name == "write_file": return _run_write(args["path"], args["content"]) if tool_name == "edit_file": return _run_edit(args["path"], args["old_text"], args["new_text"]) if tool_name == "send_message": return BUS.send(sender, args["to"], args["content"], args.get("msg_type", "message")) if tool_name == "read_inbox": return json.dumps(BUS.read_inbox(sender), indent=2) return f"Unknown tool: {tool_name}" def _teammate_tools(self) -> list: # these base tools are unchanged from s02 return [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "send_message", "description": "Send message to a teammate.", "input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}}, {"name": "read_inbox", "description": "Read and drain your inbox.", "input_schema": {"type": "object", "properties": {}}}, ] def list_all(self) -> str: if not self.config["members"]: return "No teammates." lines = [f"Team: {self.config['team_name']}"] for m in self.config["members"]: lines.append(f" {m['name']} ({m['role']}): {m['status']}") return "\n".join(lines) def member_names(self) -> list: return [m["name"] for m in self.config["members"]] TEAM = TeammateManager(TEAM_DIR) # -- Base tool implementations (these base tools are unchanged from s02) -- def _safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def _run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run( command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120, ) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def _run_read(path: str, limit: int = None) -> str: try: lines = _safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def _run_write(path: str, content: str) -> str: try: fp = _safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def _run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = _safe_path(path) c = fp.read_text() if old_text not in c: return f"Error: Text not found in {path}" fp.write_text(c.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" # -- Lead tool dispatch (9 tools) -- TOOL_HANDLERS = { "bash": lambda **kw: _run_bash(kw["command"]), "read_file": lambda **kw: _run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: _run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: _run_edit(kw["path"], kw["old_text"], kw["new_text"]), "spawn_teammate": lambda **kw: TEAM.spawn(kw["name"], kw["role"], kw["prompt"]), "list_teammates": lambda **kw: TEAM.list_all(), "send_message": lambda **kw: BUS.send("lead", kw["to"], kw["content"], kw.get("msg_type", "message")), "read_inbox": lambda **kw: json.dumps(BUS.read_inbox("lead"), indent=2), "broadcast": lambda **kw: BUS.broadcast("lead", kw["content"], TEAM.member_names()), } # these base tools are unchanged from s02 TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "spawn_teammate", "description": "Spawn a persistent teammate that runs in its own thread.", "input_schema": {"type": "object", "properties": {"name": {"type": "string"}, "role": {"type": "string"}, "prompt": {"type": "string"}}, "required": ["name", "role", "prompt"]}}, {"name": "list_teammates", "description": "List all teammates with name, role, status.", "input_schema": {"type": "object", "properties": {}}}, {"name": "send_message", "description": "Send a message to a teammate's inbox.", "input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}}, {"name": "read_inbox", "description": "Read and drain the lead's inbox.", "input_schema": {"type": "object", "properties": {}}}, {"name": "broadcast", "description": "Send a message to all teammates.", "input_schema": {"type": "object", "properties": {"content": {"type": "string"}}, "required": ["content"]}}, ] def agent_loop(messages: list): while True: inbox = BUS.read_inbox("lead") if inbox: messages.append({ "role": "user", "content": f"{json.dumps(inbox, indent=2)}", }) messages.append({ "role": "assistant", "content": "Noted inbox messages.", }) response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append({ "type": "tool_result", "tool_use_id": block.id, "content": str(output), }) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms09 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break if query.strip() == "/team": print(TEAM.list_all()) continue if query.strip() == "/inbox": print(json.dumps(BUS.read_inbox("lead"), indent=2)) continue history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s10_team_protocols.py ================================================ #!/usr/bin/env python3 # Harness: protocols -- structured handshakes between models. """ s10_team_protocols.py - Team Protocols Shutdown protocol and plan approval protocol, both using the same request_id correlation pattern. Builds on s09's team messaging. Shutdown FSM: pending -> approved | rejected Lead Teammate +---------------------+ +---------------------+ | shutdown_request | | | | { | -------> | receives request | | request_id: abc | | decides: approve? | | } | | | +---------------------+ +---------------------+ | +---------------------+ +-------v-------------+ | shutdown_response | <------- | shutdown_response | | { | | { | | request_id: abc | | request_id: abc | | approve: true | | approve: true | | } | | } | +---------------------+ +---------------------+ | v status -> "shutdown", thread stops Plan approval FSM: pending -> approved | rejected Teammate Lead +---------------------+ +---------------------+ | plan_approval | | | | submit: {plan:"..."}| -------> | reviews plan text | +---------------------+ | approve/reject? | +---------------------+ | +---------------------+ +-------v-------------+ | plan_approval_resp | <------- | plan_approval | | {approve: true} | | review: {req_id, | +---------------------+ | approve: true} | +---------------------+ Trackers: {request_id: {"target|from": name, "status": "pending|..."}} Key insight: "Same request_id correlation pattern, two domains." """ import json import os import subprocess import threading import time import uuid from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] TEAM_DIR = WORKDIR / ".team" INBOX_DIR = TEAM_DIR / "inbox" SYSTEM = f"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols." VALID_MSG_TYPES = { "message", "broadcast", "shutdown_request", "shutdown_response", "plan_approval_response", } # -- Request trackers: correlate by request_id -- shutdown_requests = {} plan_requests = {} _tracker_lock = threading.Lock() # -- MessageBus: JSONL inbox per teammate -- class MessageBus: def __init__(self, inbox_dir: Path): self.dir = inbox_dir self.dir.mkdir(parents=True, exist_ok=True) def send(self, sender: str, to: str, content: str, msg_type: str = "message", extra: dict = None) -> str: if msg_type not in VALID_MSG_TYPES: return f"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}" msg = { "type": msg_type, "from": sender, "content": content, "timestamp": time.time(), } if extra: msg.update(extra) inbox_path = self.dir / f"{to}.jsonl" with open(inbox_path, "a") as f: f.write(json.dumps(msg) + "\n") return f"Sent {msg_type} to {to}" def read_inbox(self, name: str) -> list: inbox_path = self.dir / f"{name}.jsonl" if not inbox_path.exists(): return [] messages = [] for line in inbox_path.read_text().strip().splitlines(): if line: messages.append(json.loads(line)) inbox_path.write_text("") return messages def broadcast(self, sender: str, content: str, teammates: list) -> str: count = 0 for name in teammates: if name != sender: self.send(sender, name, content, "broadcast") count += 1 return f"Broadcast to {count} teammates" BUS = MessageBus(INBOX_DIR) # -- TeammateManager with shutdown + plan approval -- class TeammateManager: def __init__(self, team_dir: Path): self.dir = team_dir self.dir.mkdir(exist_ok=True) self.config_path = self.dir / "config.json" self.config = self._load_config() self.threads = {} def _load_config(self) -> dict: if self.config_path.exists(): return json.loads(self.config_path.read_text()) return {"team_name": "default", "members": []} def _save_config(self): self.config_path.write_text(json.dumps(self.config, indent=2)) def _find_member(self, name: str) -> dict: for m in self.config["members"]: if m["name"] == name: return m return None def spawn(self, name: str, role: str, prompt: str) -> str: member = self._find_member(name) if member: if member["status"] not in ("idle", "shutdown"): return f"Error: '{name}' is currently {member['status']}" member["status"] = "working" member["role"] = role else: member = {"name": name, "role": role, "status": "working"} self.config["members"].append(member) self._save_config() thread = threading.Thread( target=self._teammate_loop, args=(name, role, prompt), daemon=True, ) self.threads[name] = thread thread.start() return f"Spawned '{name}' (role: {role})" def _teammate_loop(self, name: str, role: str, prompt: str): sys_prompt = ( f"You are '{name}', role: {role}, at {WORKDIR}. " f"Submit plans via plan_approval before major work. " f"Respond to shutdown_request with shutdown_response." ) messages = [{"role": "user", "content": prompt}] tools = self._teammate_tools() should_exit = False for _ in range(50): inbox = BUS.read_inbox(name) for msg in inbox: messages.append({"role": "user", "content": json.dumps(msg)}) if should_exit: break try: response = client.messages.create( model=MODEL, system=sys_prompt, messages=messages, tools=tools, max_tokens=8000, ) except Exception: break messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": break results = [] for block in response.content: if block.type == "tool_use": output = self._exec(name, block.name, block.input) print(f" [{name}] {block.name}: {str(output)[:120]}") results.append({ "type": "tool_result", "tool_use_id": block.id, "content": str(output), }) if block.name == "shutdown_response" and block.input.get("approve"): should_exit = True messages.append({"role": "user", "content": results}) member = self._find_member(name) if member: member["status"] = "shutdown" if should_exit else "idle" self._save_config() def _exec(self, sender: str, tool_name: str, args: dict) -> str: # these base tools are unchanged from s02 if tool_name == "bash": return _run_bash(args["command"]) if tool_name == "read_file": return _run_read(args["path"]) if tool_name == "write_file": return _run_write(args["path"], args["content"]) if tool_name == "edit_file": return _run_edit(args["path"], args["old_text"], args["new_text"]) if tool_name == "send_message": return BUS.send(sender, args["to"], args["content"], args.get("msg_type", "message")) if tool_name == "read_inbox": return json.dumps(BUS.read_inbox(sender), indent=2) if tool_name == "shutdown_response": req_id = args["request_id"] approve = args["approve"] with _tracker_lock: if req_id in shutdown_requests: shutdown_requests[req_id]["status"] = "approved" if approve else "rejected" BUS.send( sender, "lead", args.get("reason", ""), "shutdown_response", {"request_id": req_id, "approve": approve}, ) return f"Shutdown {'approved' if approve else 'rejected'}" if tool_name == "plan_approval": plan_text = args.get("plan", "") req_id = str(uuid.uuid4())[:8] with _tracker_lock: plan_requests[req_id] = {"from": sender, "plan": plan_text, "status": "pending"} BUS.send( sender, "lead", plan_text, "plan_approval_response", {"request_id": req_id, "plan": plan_text}, ) return f"Plan submitted (request_id={req_id}). Waiting for lead approval." return f"Unknown tool: {tool_name}" def _teammate_tools(self) -> list: # these base tools are unchanged from s02 return [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "send_message", "description": "Send message to a teammate.", "input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}}, {"name": "read_inbox", "description": "Read and drain your inbox.", "input_schema": {"type": "object", "properties": {}}}, {"name": "shutdown_response", "description": "Respond to a shutdown request. Approve to shut down, reject to keep working.", "input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "reason": {"type": "string"}}, "required": ["request_id", "approve"]}}, {"name": "plan_approval", "description": "Submit a plan for lead approval. Provide plan text.", "input_schema": {"type": "object", "properties": {"plan": {"type": "string"}}, "required": ["plan"]}}, ] def list_all(self) -> str: if not self.config["members"]: return "No teammates." lines = [f"Team: {self.config['team_name']}"] for m in self.config["members"]: lines.append(f" {m['name']} ({m['role']}): {m['status']}") return "\n".join(lines) def member_names(self) -> list: return [m["name"] for m in self.config["members"]] TEAM = TeammateManager(TEAM_DIR) # -- Base tool implementations (these base tools are unchanged from s02) -- def _safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def _run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run( command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120, ) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def _run_read(path: str, limit: int = None) -> str: try: lines = _safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def _run_write(path: str, content: str) -> str: try: fp = _safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def _run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = _safe_path(path) c = fp.read_text() if old_text not in c: return f"Error: Text not found in {path}" fp.write_text(c.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" # -- Lead-specific protocol handlers -- def handle_shutdown_request(teammate: str) -> str: req_id = str(uuid.uuid4())[:8] with _tracker_lock: shutdown_requests[req_id] = {"target": teammate, "status": "pending"} BUS.send( "lead", teammate, "Please shut down gracefully.", "shutdown_request", {"request_id": req_id}, ) return f"Shutdown request {req_id} sent to '{teammate}' (status: pending)" def handle_plan_review(request_id: str, approve: bool, feedback: str = "") -> str: with _tracker_lock: req = plan_requests.get(request_id) if not req: return f"Error: Unknown plan request_id '{request_id}'" with _tracker_lock: req["status"] = "approved" if approve else "rejected" BUS.send( "lead", req["from"], feedback, "plan_approval_response", {"request_id": request_id, "approve": approve, "feedback": feedback}, ) return f"Plan {req['status']} for '{req['from']}'" def _check_shutdown_status(request_id: str) -> str: with _tracker_lock: return json.dumps(shutdown_requests.get(request_id, {"error": "not found"})) # -- Lead tool dispatch (12 tools) -- TOOL_HANDLERS = { "bash": lambda **kw: _run_bash(kw["command"]), "read_file": lambda **kw: _run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: _run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: _run_edit(kw["path"], kw["old_text"], kw["new_text"]), "spawn_teammate": lambda **kw: TEAM.spawn(kw["name"], kw["role"], kw["prompt"]), "list_teammates": lambda **kw: TEAM.list_all(), "send_message": lambda **kw: BUS.send("lead", kw["to"], kw["content"], kw.get("msg_type", "message")), "read_inbox": lambda **kw: json.dumps(BUS.read_inbox("lead"), indent=2), "broadcast": lambda **kw: BUS.broadcast("lead", kw["content"], TEAM.member_names()), "shutdown_request": lambda **kw: handle_shutdown_request(kw["teammate"]), "shutdown_response": lambda **kw: _check_shutdown_status(kw.get("request_id", "")), "plan_approval": lambda **kw: handle_plan_review(kw["request_id"], kw["approve"], kw.get("feedback", "")), } # these base tools are unchanged from s02 TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "spawn_teammate", "description": "Spawn a persistent teammate.", "input_schema": {"type": "object", "properties": {"name": {"type": "string"}, "role": {"type": "string"}, "prompt": {"type": "string"}}, "required": ["name", "role", "prompt"]}}, {"name": "list_teammates", "description": "List all teammates.", "input_schema": {"type": "object", "properties": {}}}, {"name": "send_message", "description": "Send a message to a teammate.", "input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}}, {"name": "read_inbox", "description": "Read and drain the lead's inbox.", "input_schema": {"type": "object", "properties": {}}}, {"name": "broadcast", "description": "Send a message to all teammates.", "input_schema": {"type": "object", "properties": {"content": {"type": "string"}}, "required": ["content"]}}, {"name": "shutdown_request", "description": "Request a teammate to shut down gracefully. Returns a request_id for tracking.", "input_schema": {"type": "object", "properties": {"teammate": {"type": "string"}}, "required": ["teammate"]}}, {"name": "shutdown_response", "description": "Check the status of a shutdown request by request_id.", "input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}}, "required": ["request_id"]}}, {"name": "plan_approval", "description": "Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.", "input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "feedback": {"type": "string"}}, "required": ["request_id", "approve"]}}, ] def agent_loop(messages: list): while True: inbox = BUS.read_inbox("lead") if inbox: messages.append({ "role": "user", "content": f"{json.dumps(inbox, indent=2)}", }) messages.append({ "role": "assistant", "content": "Noted inbox messages.", }) response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append({ "type": "tool_result", "tool_use_id": block.id, "content": str(output), }) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms10 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break if query.strip() == "/team": print(TEAM.list_all()) continue if query.strip() == "/inbox": print(json.dumps(BUS.read_inbox("lead"), indent=2)) continue history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s11_autonomous_agents.py ================================================ #!/usr/bin/env python3 # Harness: autonomy -- models that find work without being told. """ s11_autonomous_agents.py - Autonomous Agents Idle cycle with task board polling, auto-claiming unclaimed tasks, and identity re-injection after context compression. Builds on s10's protocols. Teammate lifecycle: +-------+ | spawn | +---+---+ | v +-------+ tool_use +-------+ | WORK | <----------- | LLM | +---+---+ +-------+ | | stop_reason != tool_use v +--------+ | IDLE | poll every 5s for up to 60s +---+----+ | +---> check inbox -> message? -> resume WORK | +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK | +---> timeout (60s) -> shutdown Identity re-injection after compression: messages = [identity_block, ...remaining...] "You are 'coder', role: backend, team: my-team" Key insight: "The agent finds work itself." """ import json import os import subprocess import threading import time import uuid from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] TEAM_DIR = WORKDIR / ".team" INBOX_DIR = TEAM_DIR / "inbox" TASKS_DIR = WORKDIR / ".tasks" POLL_INTERVAL = 5 IDLE_TIMEOUT = 60 SYSTEM = f"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves." VALID_MSG_TYPES = { "message", "broadcast", "shutdown_request", "shutdown_response", "plan_approval_response", } # -- Request trackers -- shutdown_requests = {} plan_requests = {} _tracker_lock = threading.Lock() _claim_lock = threading.Lock() # -- MessageBus: JSONL inbox per teammate -- class MessageBus: def __init__(self, inbox_dir: Path): self.dir = inbox_dir self.dir.mkdir(parents=True, exist_ok=True) def send(self, sender: str, to: str, content: str, msg_type: str = "message", extra: dict = None) -> str: if msg_type not in VALID_MSG_TYPES: return f"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}" msg = { "type": msg_type, "from": sender, "content": content, "timestamp": time.time(), } if extra: msg.update(extra) inbox_path = self.dir / f"{to}.jsonl" with open(inbox_path, "a") as f: f.write(json.dumps(msg) + "\n") return f"Sent {msg_type} to {to}" def read_inbox(self, name: str) -> list: inbox_path = self.dir / f"{name}.jsonl" if not inbox_path.exists(): return [] messages = [] for line in inbox_path.read_text().strip().splitlines(): if line: messages.append(json.loads(line)) inbox_path.write_text("") return messages def broadcast(self, sender: str, content: str, teammates: list) -> str: count = 0 for name in teammates: if name != sender: self.send(sender, name, content, "broadcast") count += 1 return f"Broadcast to {count} teammates" BUS = MessageBus(INBOX_DIR) # -- Task board scanning -- def scan_unclaimed_tasks() -> list: TASKS_DIR.mkdir(exist_ok=True) unclaimed = [] for f in sorted(TASKS_DIR.glob("task_*.json")): task = json.loads(f.read_text()) if (task.get("status") == "pending" and not task.get("owner") and not task.get("blockedBy")): unclaimed.append(task) return unclaimed def claim_task(task_id: int, owner: str) -> str: with _claim_lock: path = TASKS_DIR / f"task_{task_id}.json" if not path.exists(): return f"Error: Task {task_id} not found" task = json.loads(path.read_text()) task["owner"] = owner task["status"] = "in_progress" path.write_text(json.dumps(task, indent=2)) return f"Claimed task #{task_id} for {owner}" # -- Identity re-injection after compression -- def make_identity_block(name: str, role: str, team_name: str) -> dict: return { "role": "user", "content": f"You are '{name}', role: {role}, team: {team_name}. Continue your work.", } # -- Autonomous TeammateManager -- class TeammateManager: def __init__(self, team_dir: Path): self.dir = team_dir self.dir.mkdir(exist_ok=True) self.config_path = self.dir / "config.json" self.config = self._load_config() self.threads = {} def _load_config(self) -> dict: if self.config_path.exists(): return json.loads(self.config_path.read_text()) return {"team_name": "default", "members": []} def _save_config(self): self.config_path.write_text(json.dumps(self.config, indent=2)) def _find_member(self, name: str) -> dict: for m in self.config["members"]: if m["name"] == name: return m return None def _set_status(self, name: str, status: str): member = self._find_member(name) if member: member["status"] = status self._save_config() def spawn(self, name: str, role: str, prompt: str) -> str: member = self._find_member(name) if member: if member["status"] not in ("idle", "shutdown"): return f"Error: '{name}' is currently {member['status']}" member["status"] = "working" member["role"] = role else: member = {"name": name, "role": role, "status": "working"} self.config["members"].append(member) self._save_config() thread = threading.Thread( target=self._loop, args=(name, role, prompt), daemon=True, ) self.threads[name] = thread thread.start() return f"Spawned '{name}' (role: {role})" def _loop(self, name: str, role: str, prompt: str): team_name = self.config["team_name"] sys_prompt = ( f"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. " f"Use idle tool when you have no more work. You will auto-claim new tasks." ) messages = [{"role": "user", "content": prompt}] tools = self._teammate_tools() while True: # -- WORK PHASE: standard agent loop -- for _ in range(50): inbox = BUS.read_inbox(name) for msg in inbox: if msg.get("type") == "shutdown_request": self._set_status(name, "shutdown") return messages.append({"role": "user", "content": json.dumps(msg)}) try: response = client.messages.create( model=MODEL, system=sys_prompt, messages=messages, tools=tools, max_tokens=8000, ) except Exception: self._set_status(name, "idle") return messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": break results = [] idle_requested = False for block in response.content: if block.type == "tool_use": if block.name == "idle": idle_requested = True output = "Entering idle phase. Will poll for new tasks." else: output = self._exec(name, block.name, block.input) print(f" [{name}] {block.name}: {str(output)[:120]}") results.append({ "type": "tool_result", "tool_use_id": block.id, "content": str(output), }) messages.append({"role": "user", "content": results}) if idle_requested: break # -- IDLE PHASE: poll for inbox messages and unclaimed tasks -- self._set_status(name, "idle") resume = False polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1) for _ in range(polls): time.sleep(POLL_INTERVAL) inbox = BUS.read_inbox(name) if inbox: for msg in inbox: if msg.get("type") == "shutdown_request": self._set_status(name, "shutdown") return messages.append({"role": "user", "content": json.dumps(msg)}) resume = True break unclaimed = scan_unclaimed_tasks() if unclaimed: task = unclaimed[0] claim_task(task["id"], name) task_prompt = ( f"Task #{task['id']}: {task['subject']}\n" f"{task.get('description', '')}" ) if len(messages) <= 3: messages.insert(0, make_identity_block(name, role, team_name)) messages.insert(1, {"role": "assistant", "content": f"I am {name}. Continuing."}) messages.append({"role": "user", "content": task_prompt}) messages.append({"role": "assistant", "content": f"Claimed task #{task['id']}. Working on it."}) resume = True break if not resume: self._set_status(name, "shutdown") return self._set_status(name, "working") def _exec(self, sender: str, tool_name: str, args: dict) -> str: # these base tools are unchanged from s02 if tool_name == "bash": return _run_bash(args["command"]) if tool_name == "read_file": return _run_read(args["path"]) if tool_name == "write_file": return _run_write(args["path"], args["content"]) if tool_name == "edit_file": return _run_edit(args["path"], args["old_text"], args["new_text"]) if tool_name == "send_message": return BUS.send(sender, args["to"], args["content"], args.get("msg_type", "message")) if tool_name == "read_inbox": return json.dumps(BUS.read_inbox(sender), indent=2) if tool_name == "shutdown_response": req_id = args["request_id"] with _tracker_lock: if req_id in shutdown_requests: shutdown_requests[req_id]["status"] = "approved" if args["approve"] else "rejected" BUS.send( sender, "lead", args.get("reason", ""), "shutdown_response", {"request_id": req_id, "approve": args["approve"]}, ) return f"Shutdown {'approved' if args['approve'] else 'rejected'}" if tool_name == "plan_approval": plan_text = args.get("plan", "") req_id = str(uuid.uuid4())[:8] with _tracker_lock: plan_requests[req_id] = {"from": sender, "plan": plan_text, "status": "pending"} BUS.send( sender, "lead", plan_text, "plan_approval_response", {"request_id": req_id, "plan": plan_text}, ) return f"Plan submitted (request_id={req_id}). Waiting for approval." if tool_name == "claim_task": return claim_task(args["task_id"], sender) return f"Unknown tool: {tool_name}" def _teammate_tools(self) -> list: # these base tools are unchanged from s02 return [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "send_message", "description": "Send message to a teammate.", "input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}}, {"name": "read_inbox", "description": "Read and drain your inbox.", "input_schema": {"type": "object", "properties": {}}}, {"name": "shutdown_response", "description": "Respond to a shutdown request.", "input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "reason": {"type": "string"}}, "required": ["request_id", "approve"]}}, {"name": "plan_approval", "description": "Submit a plan for lead approval.", "input_schema": {"type": "object", "properties": {"plan": {"type": "string"}}, "required": ["plan"]}}, {"name": "idle", "description": "Signal that you have no more work. Enters idle polling phase.", "input_schema": {"type": "object", "properties": {}}}, {"name": "claim_task", "description": "Claim a task from the task board by ID.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}}, ] def list_all(self) -> str: if not self.config["members"]: return "No teammates." lines = [f"Team: {self.config['team_name']}"] for m in self.config["members"]: lines.append(f" {m['name']} ({m['role']}): {m['status']}") return "\n".join(lines) def member_names(self) -> list: return [m["name"] for m in self.config["members"]] TEAM = TeammateManager(TEAM_DIR) # -- Base tool implementations (these base tools are unchanged from s02) -- def _safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def _run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run( command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120, ) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def _run_read(path: str, limit: int = None) -> str: try: lines = _safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def _run_write(path: str, content: str) -> str: try: fp = _safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def _run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = _safe_path(path) c = fp.read_text() if old_text not in c: return f"Error: Text not found in {path}" fp.write_text(c.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" # -- Lead-specific protocol handlers -- def handle_shutdown_request(teammate: str) -> str: req_id = str(uuid.uuid4())[:8] with _tracker_lock: shutdown_requests[req_id] = {"target": teammate, "status": "pending"} BUS.send( "lead", teammate, "Please shut down gracefully.", "shutdown_request", {"request_id": req_id}, ) return f"Shutdown request {req_id} sent to '{teammate}'" def handle_plan_review(request_id: str, approve: bool, feedback: str = "") -> str: with _tracker_lock: req = plan_requests.get(request_id) if not req: return f"Error: Unknown plan request_id '{request_id}'" with _tracker_lock: req["status"] = "approved" if approve else "rejected" BUS.send( "lead", req["from"], feedback, "plan_approval_response", {"request_id": request_id, "approve": approve, "feedback": feedback}, ) return f"Plan {req['status']} for '{req['from']}'" def _check_shutdown_status(request_id: str) -> str: with _tracker_lock: return json.dumps(shutdown_requests.get(request_id, {"error": "not found"})) # -- Lead tool dispatch (14 tools) -- TOOL_HANDLERS = { "bash": lambda **kw: _run_bash(kw["command"]), "read_file": lambda **kw: _run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: _run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: _run_edit(kw["path"], kw["old_text"], kw["new_text"]), "spawn_teammate": lambda **kw: TEAM.spawn(kw["name"], kw["role"], kw["prompt"]), "list_teammates": lambda **kw: TEAM.list_all(), "send_message": lambda **kw: BUS.send("lead", kw["to"], kw["content"], kw.get("msg_type", "message")), "read_inbox": lambda **kw: json.dumps(BUS.read_inbox("lead"), indent=2), "broadcast": lambda **kw: BUS.broadcast("lead", kw["content"], TEAM.member_names()), "shutdown_request": lambda **kw: handle_shutdown_request(kw["teammate"]), "shutdown_response": lambda **kw: _check_shutdown_status(kw.get("request_id", "")), "plan_approval": lambda **kw: handle_plan_review(kw["request_id"], kw["approve"], kw.get("feedback", "")), "idle": lambda **kw: "Lead does not idle.", "claim_task": lambda **kw: claim_task(kw["task_id"], "lead"), } # these base tools are unchanged from s02 TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "spawn_teammate", "description": "Spawn an autonomous teammate.", "input_schema": {"type": "object", "properties": {"name": {"type": "string"}, "role": {"type": "string"}, "prompt": {"type": "string"}}, "required": ["name", "role", "prompt"]}}, {"name": "list_teammates", "description": "List all teammates.", "input_schema": {"type": "object", "properties": {}}}, {"name": "send_message", "description": "Send a message to a teammate.", "input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}}, {"name": "read_inbox", "description": "Read and drain the lead's inbox.", "input_schema": {"type": "object", "properties": {}}}, {"name": "broadcast", "description": "Send a message to all teammates.", "input_schema": {"type": "object", "properties": {"content": {"type": "string"}}, "required": ["content"]}}, {"name": "shutdown_request", "description": "Request a teammate to shut down.", "input_schema": {"type": "object", "properties": {"teammate": {"type": "string"}}, "required": ["teammate"]}}, {"name": "shutdown_response", "description": "Check shutdown request status.", "input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}}, "required": ["request_id"]}}, {"name": "plan_approval", "description": "Approve or reject a teammate's plan.", "input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "feedback": {"type": "string"}}, "required": ["request_id", "approve"]}}, {"name": "idle", "description": "Enter idle state (for lead -- rarely used).", "input_schema": {"type": "object", "properties": {}}}, {"name": "claim_task", "description": "Claim a task from the board by ID.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}}, ] def agent_loop(messages: list): while True: inbox = BUS.read_inbox("lead") if inbox: messages.append({ "role": "user", "content": f"{json.dumps(inbox, indent=2)}", }) messages.append({ "role": "assistant", "content": "Noted inbox messages.", }) response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append({ "type": "tool_result", "tool_use_id": block.id, "content": str(output), }) messages.append({"role": "user", "content": results}) if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms11 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break if query.strip() == "/team": print(TEAM.list_all()) continue if query.strip() == "/inbox": print(json.dumps(BUS.read_inbox("lead"), indent=2)) continue if query.strip() == "/tasks": TASKS_DIR.mkdir(exist_ok=True) for f in sorted(TASKS_DIR.glob("task_*.json")): t = json.loads(f.read_text()) marker = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}.get(t["status"], "[?]") owner = f" @{t['owner']}" if t.get("owner") else "" print(f" {marker} #{t['id']}: {t['subject']}{owner}") continue history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s12_worktree_task_isolation.py ================================================ #!/usr/bin/env python3 # Harness: directory isolation -- parallel execution lanes that never collide. """ s12_worktree_task_isolation.py - Worktree + Task Isolation Directory-level isolation for parallel task execution. Tasks are the control plane and worktrees are the execution plane. .tasks/task_12.json { "id": 12, "subject": "Implement auth refactor", "status": "in_progress", "worktree": "auth-refactor" } .worktrees/index.json { "worktrees": [ { "name": "auth-refactor", "path": ".../.worktrees/auth-refactor", "branch": "wt/auth-refactor", "task_id": 12, "status": "active" } ] } Key insight: "Isolate by directory, coordinate by task ID." """ import json import os import re import subprocess import time from pathlib import Path from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] def detect_repo_root(cwd: Path) -> Path | None: """Return git repo root if cwd is inside a repo, else None.""" try: r = subprocess.run( ["git", "rev-parse", "--show-toplevel"], cwd=cwd, capture_output=True, text=True, timeout=10, ) if r.returncode != 0: return None root = Path(r.stdout.strip()) return root if root.exists() else None except Exception: return None REPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR SYSTEM = ( f"You are a coding agent at {WORKDIR}. " "Use task + worktree tools for multi-task work. " "For parallel or risky changes: create tasks, allocate worktree lanes, " "run commands in those lanes, then choose keep/remove for closeout. " "Use worktree_events when you need lifecycle visibility." ) # -- EventBus: append-only lifecycle events for observability -- class EventBus: def __init__(self, event_log_path: Path): self.path = event_log_path self.path.parent.mkdir(parents=True, exist_ok=True) if not self.path.exists(): self.path.write_text("") def emit( self, event: str, task: dict | None = None, worktree: dict | None = None, error: str | None = None, ): payload = { "event": event, "ts": time.time(), "task": task or {}, "worktree": worktree or {}, } if error: payload["error"] = error with self.path.open("a", encoding="utf-8") as f: f.write(json.dumps(payload) + "\n") def list_recent(self, limit: int = 20) -> str: n = max(1, min(int(limit or 20), 200)) lines = self.path.read_text(encoding="utf-8").splitlines() recent = lines[-n:] items = [] for line in recent: try: items.append(json.loads(line)) except Exception: items.append({"event": "parse_error", "raw": line}) return json.dumps(items, indent=2) # -- TaskManager: persistent task board with optional worktree binding -- class TaskManager: def __init__(self, tasks_dir: Path): self.dir = tasks_dir self.dir.mkdir(parents=True, exist_ok=True) self._next_id = self._max_id() + 1 def _max_id(self) -> int: ids = [] for f in self.dir.glob("task_*.json"): try: ids.append(int(f.stem.split("_")[1])) except Exception: pass return max(ids) if ids else 0 def _path(self, task_id: int) -> Path: return self.dir / f"task_{task_id}.json" def _load(self, task_id: int) -> dict: path = self._path(task_id) if not path.exists(): raise ValueError(f"Task {task_id} not found") return json.loads(path.read_text()) def _save(self, task: dict): self._path(task["id"]).write_text(json.dumps(task, indent=2)) def create(self, subject: str, description: str = "") -> str: task = { "id": self._next_id, "subject": subject, "description": description, "status": "pending", "owner": "", "worktree": "", "blockedBy": [], "created_at": time.time(), "updated_at": time.time(), } self._save(task) self._next_id += 1 return json.dumps(task, indent=2) def get(self, task_id: int) -> str: return json.dumps(self._load(task_id), indent=2) def exists(self, task_id: int) -> bool: return self._path(task_id).exists() def update(self, task_id: int, status: str = None, owner: str = None) -> str: task = self._load(task_id) if status: if status not in ("pending", "in_progress", "completed"): raise ValueError(f"Invalid status: {status}") task["status"] = status if owner is not None: task["owner"] = owner task["updated_at"] = time.time() self._save(task) return json.dumps(task, indent=2) def bind_worktree(self, task_id: int, worktree: str, owner: str = "") -> str: task = self._load(task_id) task["worktree"] = worktree if owner: task["owner"] = owner if task["status"] == "pending": task["status"] = "in_progress" task["updated_at"] = time.time() self._save(task) return json.dumps(task, indent=2) def unbind_worktree(self, task_id: int) -> str: task = self._load(task_id) task["worktree"] = "" task["updated_at"] = time.time() self._save(task) return json.dumps(task, indent=2) def list_all(self) -> str: tasks = [] for f in sorted(self.dir.glob("task_*.json")): tasks.append(json.loads(f.read_text())) if not tasks: return "No tasks." lines = [] for t in tasks: marker = { "pending": "[ ]", "in_progress": "[>]", "completed": "[x]", }.get(t["status"], "[?]") owner = f" owner={t['owner']}" if t.get("owner") else "" wt = f" wt={t['worktree']}" if t.get("worktree") else "" lines.append(f"{marker} #{t['id']}: {t['subject']}{owner}{wt}") return "\n".join(lines) TASKS = TaskManager(REPO_ROOT / ".tasks") EVENTS = EventBus(REPO_ROOT / ".worktrees" / "events.jsonl") # -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index -- class WorktreeManager: def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus): self.repo_root = repo_root self.tasks = tasks self.events = events self.dir = repo_root / ".worktrees" self.dir.mkdir(parents=True, exist_ok=True) self.index_path = self.dir / "index.json" if not self.index_path.exists(): self.index_path.write_text(json.dumps({"worktrees": []}, indent=2)) self.git_available = self._is_git_repo() def _is_git_repo(self) -> bool: try: r = subprocess.run( ["git", "rev-parse", "--is-inside-work-tree"], cwd=self.repo_root, capture_output=True, text=True, timeout=10, ) return r.returncode == 0 except Exception: return False def _run_git(self, args: list[str]) -> str: if not self.git_available: raise RuntimeError("Not in a git repository. worktree tools require git.") r = subprocess.run( ["git", *args], cwd=self.repo_root, capture_output=True, text=True, timeout=120, ) if r.returncode != 0: msg = (r.stdout + r.stderr).strip() raise RuntimeError(msg or f"git {' '.join(args)} failed") return (r.stdout + r.stderr).strip() or "(no output)" def _load_index(self) -> dict: return json.loads(self.index_path.read_text()) def _save_index(self, data: dict): self.index_path.write_text(json.dumps(data, indent=2)) def _find(self, name: str) -> dict | None: idx = self._load_index() for wt in idx.get("worktrees", []): if wt.get("name") == name: return wt return None def _validate_name(self, name: str): if not re.fullmatch(r"[A-Za-z0-9._-]{1,40}", name or ""): raise ValueError( "Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -" ) def create(self, name: str, task_id: int = None, base_ref: str = "HEAD") -> str: self._validate_name(name) if self._find(name): raise ValueError(f"Worktree '{name}' already exists in index") if task_id is not None and not self.tasks.exists(task_id): raise ValueError(f"Task {task_id} not found") path = self.dir / name branch = f"wt/{name}" self.events.emit( "worktree.create.before", task={"id": task_id} if task_id is not None else {}, worktree={"name": name, "base_ref": base_ref}, ) try: self._run_git(["worktree", "add", "-b", branch, str(path), base_ref]) entry = { "name": name, "path": str(path), "branch": branch, "task_id": task_id, "status": "active", "created_at": time.time(), } idx = self._load_index() idx["worktrees"].append(entry) self._save_index(idx) if task_id is not None: self.tasks.bind_worktree(task_id, name) self.events.emit( "worktree.create.after", task={"id": task_id} if task_id is not None else {}, worktree={ "name": name, "path": str(path), "branch": branch, "status": "active", }, ) return json.dumps(entry, indent=2) except Exception as e: self.events.emit( "worktree.create.failed", task={"id": task_id} if task_id is not None else {}, worktree={"name": name, "base_ref": base_ref}, error=str(e), ) raise def list_all(self) -> str: idx = self._load_index() wts = idx.get("worktrees", []) if not wts: return "No worktrees in index." lines = [] for wt in wts: suffix = f" task={wt['task_id']}" if wt.get("task_id") else "" lines.append( f"[{wt.get('status', 'unknown')}] {wt['name']} -> " f"{wt['path']} ({wt.get('branch', '-')}){suffix}" ) return "\n".join(lines) def status(self, name: str) -> str: wt = self._find(name) if not wt: return f"Error: Unknown worktree '{name}'" path = Path(wt["path"]) if not path.exists(): return f"Error: Worktree path missing: {path}" r = subprocess.run( ["git", "status", "--short", "--branch"], cwd=path, capture_output=True, text=True, timeout=60, ) text = (r.stdout + r.stderr).strip() return text or "Clean worktree" def run(self, name: str, command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" wt = self._find(name) if not wt: return f"Error: Unknown worktree '{name}'" path = Path(wt["path"]) if not path.exists(): return f"Error: Worktree path missing: {path}" try: r = subprocess.run( command, shell=True, cwd=path, capture_output=True, text=True, timeout=300, ) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (300s)" def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str: wt = self._find(name) if not wt: return f"Error: Unknown worktree '{name}'" self.events.emit( "worktree.remove.before", task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, worktree={"name": name, "path": wt.get("path")}, ) try: args = ["worktree", "remove"] if force: args.append("--force") args.append(wt["path"]) self._run_git(args) if complete_task and wt.get("task_id") is not None: task_id = wt["task_id"] before = json.loads(self.tasks.get(task_id)) self.tasks.update(task_id, status="completed") self.tasks.unbind_worktree(task_id) self.events.emit( "task.completed", task={ "id": task_id, "subject": before.get("subject", ""), "status": "completed", }, worktree={"name": name}, ) idx = self._load_index() for item in idx.get("worktrees", []): if item.get("name") == name: item["status"] = "removed" item["removed_at"] = time.time() self._save_index(idx) self.events.emit( "worktree.remove.after", task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, worktree={"name": name, "path": wt.get("path"), "status": "removed"}, ) return f"Removed worktree '{name}'" except Exception as e: self.events.emit( "worktree.remove.failed", task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, worktree={"name": name, "path": wt.get("path")}, error=str(e), ) raise def keep(self, name: str) -> str: wt = self._find(name) if not wt: return f"Error: Unknown worktree '{name}'" idx = self._load_index() kept = None for item in idx.get("worktrees", []): if item.get("name") == name: item["status"] = "kept" item["kept_at"] = time.time() kept = item self._save_index(idx) self.events.emit( "worktree.keep", task={"id": wt.get("task_id")} if wt.get("task_id") is not None else {}, worktree={ "name": name, "path": wt.get("path"), "status": "kept", }, ) return json.dumps(kept, indent=2) if kept else f"Error: Unknown worktree '{name}'" WORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS) # -- Base tools (kept minimal, same style as previous sessions) -- def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run( command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120, ) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def run_read(path: str, limit: int = None) -> str: try: lines = safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = safe_path(path) c = fp.read_text() if old_text not in c: return f"Error: Text not found in {path}" fp.write_text(c.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), "task_create": lambda **kw: TASKS.create(kw["subject"], kw.get("description", "")), "task_list": lambda **kw: TASKS.list_all(), "task_get": lambda **kw: TASKS.get(kw["task_id"]), "task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status"), kw.get("owner")), "task_bind_worktree": lambda **kw: TASKS.bind_worktree(kw["task_id"], kw["worktree"], kw.get("owner", "")), "worktree_create": lambda **kw: WORKTREES.create(kw["name"], kw.get("task_id"), kw.get("base_ref", "HEAD")), "worktree_list": lambda **kw: WORKTREES.list_all(), "worktree_status": lambda **kw: WORKTREES.status(kw["name"]), "worktree_run": lambda **kw: WORKTREES.run(kw["name"], kw["command"]), "worktree_keep": lambda **kw: WORKTREES.keep(kw["name"]), "worktree_remove": lambda **kw: WORKTREES.remove(kw["name"], kw.get("force", False), kw.get("complete_task", False)), "worktree_events": lambda **kw: EVENTS.list_recent(kw.get("limit", 20)), } TOOLS = [ { "name": "bash", "description": "Run a shell command in the current workspace (blocking).", "input_schema": { "type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"], }, }, { "name": "read_file", "description": "Read file contents.", "input_schema": { "type": "object", "properties": { "path": {"type": "string"}, "limit": {"type": "integer"}, }, "required": ["path"], }, }, { "name": "write_file", "description": "Write content to file.", "input_schema": { "type": "object", "properties": { "path": {"type": "string"}, "content": {"type": "string"}, }, "required": ["path", "content"], }, }, { "name": "edit_file", "description": "Replace exact text in file.", "input_schema": { "type": "object", "properties": { "path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}, }, "required": ["path", "old_text", "new_text"], }, }, { "name": "task_create", "description": "Create a new task on the shared task board.", "input_schema": { "type": "object", "properties": { "subject": {"type": "string"}, "description": {"type": "string"}, }, "required": ["subject"], }, }, { "name": "task_list", "description": "List all tasks with status, owner, and worktree binding.", "input_schema": {"type": "object", "properties": {}}, }, { "name": "task_get", "description": "Get task details by ID.", "input_schema": { "type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"], }, }, { "name": "task_update", "description": "Update task status or owner.", "input_schema": { "type": "object", "properties": { "task_id": {"type": "integer"}, "status": { "type": "string", "enum": ["pending", "in_progress", "completed"], }, "owner": {"type": "string"}, }, "required": ["task_id"], }, }, { "name": "task_bind_worktree", "description": "Bind a task to a worktree name.", "input_schema": { "type": "object", "properties": { "task_id": {"type": "integer"}, "worktree": {"type": "string"}, "owner": {"type": "string"}, }, "required": ["task_id", "worktree"], }, }, { "name": "worktree_create", "description": "Create a git worktree and optionally bind it to a task.", "input_schema": { "type": "object", "properties": { "name": {"type": "string"}, "task_id": {"type": "integer"}, "base_ref": {"type": "string"}, }, "required": ["name"], }, }, { "name": "worktree_list", "description": "List worktrees tracked in .worktrees/index.json.", "input_schema": {"type": "object", "properties": {}}, }, { "name": "worktree_status", "description": "Show git status for one worktree.", "input_schema": { "type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"], }, }, { "name": "worktree_run", "description": "Run a shell command in a named worktree directory.", "input_schema": { "type": "object", "properties": { "name": {"type": "string"}, "command": {"type": "string"}, }, "required": ["name", "command"], }, }, { "name": "worktree_remove", "description": "Remove a worktree and optionally mark its bound task completed.", "input_schema": { "type": "object", "properties": { "name": {"type": "string"}, "force": {"type": "boolean"}, "complete_task": {"type": "boolean"}, }, "required": ["name"], }, }, { "name": "worktree_keep", "description": "Mark a worktree as kept in lifecycle state without removing it.", "input_schema": { "type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"], }, }, { "name": "worktree_events", "description": "List recent worktree/task lifecycle events from .worktrees/events.jsonl.", "input_schema": { "type": "object", "properties": {"limit": {"type": "integer"}}, }, }, ] def agent_loop(messages: list): while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append( { "type": "tool_result", "tool_use_id": block.id, "content": str(output), } ) messages.append({"role": "user", "content": results}) if __name__ == "__main__": print(f"Repo root for s12: {REPO_ROOT}") if not WORKTREES.git_available: print("Note: Not in a git repo. worktree_* tools will return errors.") history = [] while True: try: query = input("\033[36ms12 >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break history.append({"role": "user", "content": query}) agent_loop(history) response_content = history[-1]["content"] if isinstance(response_content, list): for block in response_content: if hasattr(block, "text"): print(block.text) print() ================================================ FILE: agents/s_full.py ================================================ #!/usr/bin/env python3 # Harness: all mechanisms combined -- the complete cockpit for the model. """ s_full.py - Full Reference Agent Capstone implementation combining every mechanism from s01-s11. Session s12 (task-aware worktree isolation) is taught separately. NOT a teaching session -- this is the "put it all together" reference. +------------------------------------------------------------------+ | FULL AGENT | | | | System prompt (s05 skills, task-first + optional todo nag) | | | | Before each LLM call: | | +--------------------+ +------------------+ +--------------+ | | | Microcompact (s06) | | Drain bg (s08) | | Check inbox | | | | Auto-compact (s06) | | notifications | | (s09) | | | +--------------------+ +------------------+ +--------------+ | | | | Tool dispatch (s02 pattern): | | +--------+----------+----------+---------+-----------+ | | | bash | read | write | edit | TodoWrite | | | | task | load_sk | compress | bg_run | bg_check | | | | t_crt | t_get | t_upd | t_list | spawn_tm | | | | list_tm| send_msg | rd_inbox | bcast | shutdown | | | | plan | idle | claim | | | | | +--------+----------+----------+---------+-----------+ | | | | Subagent (s04): spawn -> work -> return summary | | Teammate (s09): spawn -> work -> idle -> auto-claim (s11) | | Shutdown (s10): request_id handshake | | Plan gate (s10): submit -> approve/reject | +------------------------------------------------------------------+ REPL commands: /compact /tasks /team /inbox """ import json import os import re import subprocess import threading import time import uuid from pathlib import Path from queue import Queue from anthropic import Anthropic from dotenv import load_dotenv load_dotenv(override=True) if os.getenv("ANTHROPIC_BASE_URL"): os.environ.pop("ANTHROPIC_AUTH_TOKEN", None) WORKDIR = Path.cwd() client = Anthropic(base_url=os.getenv("ANTHROPIC_BASE_URL")) MODEL = os.environ["MODEL_ID"] TEAM_DIR = WORKDIR / ".team" INBOX_DIR = TEAM_DIR / "inbox" TASKS_DIR = WORKDIR / ".tasks" SKILLS_DIR = WORKDIR / "skills" TRANSCRIPT_DIR = WORKDIR / ".transcripts" TOKEN_THRESHOLD = 100000 POLL_INTERVAL = 5 IDLE_TIMEOUT = 60 VALID_MSG_TYPES = {"message", "broadcast", "shutdown_request", "shutdown_response", "plan_approval_response"} # === SECTION: base_tools === def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=120) out = (r.stdout + r.stderr).strip() return out[:50000] if out else "(no output)" except subprocess.TimeoutExpired: return "Error: Timeout (120s)" def run_read(path: str, limit: int = None) -> str: try: lines = safe_path(path).read_text().splitlines() if limit and limit < len(lines): lines = lines[:limit] + [f"... ({len(lines) - limit} more)"] return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write(path: str, content: str) -> str: try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes to {path}" except Exception as e: return f"Error: {e}" def run_edit(path: str, old_text: str, new_text: str) -> str: try: fp = safe_path(path) c = fp.read_text() if old_text not in c: return f"Error: Text not found in {path}" fp.write_text(c.replace(old_text, new_text, 1)) return f"Edited {path}" except Exception as e: return f"Error: {e}" # === SECTION: todos (s03) === class TodoManager: def __init__(self): self.items = [] def update(self, items: list) -> str: validated, ip = [], 0 for i, item in enumerate(items): content = str(item.get("content", "")).strip() status = str(item.get("status", "pending")).lower() af = str(item.get("activeForm", "")).strip() if not content: raise ValueError(f"Item {i}: content required") if status not in ("pending", "in_progress", "completed"): raise ValueError(f"Item {i}: invalid status '{status}'") if not af: raise ValueError(f"Item {i}: activeForm required") if status == "in_progress": ip += 1 validated.append({"content": content, "status": status, "activeForm": af}) if len(validated) > 20: raise ValueError("Max 20 todos") if ip > 1: raise ValueError("Only one in_progress allowed") self.items = validated return self.render() def render(self) -> str: if not self.items: return "No todos." lines = [] for item in self.items: m = {"completed": "[x]", "in_progress": "[>]", "pending": "[ ]"}.get(item["status"], "[?]") suffix = f" <- {item['activeForm']}" if item["status"] == "in_progress" else "" lines.append(f"{m} {item['content']}{suffix}") done = sum(1 for t in self.items if t["status"] == "completed") lines.append(f"\n({done}/{len(self.items)} completed)") return "\n".join(lines) def has_open_items(self) -> bool: return any(item.get("status") != "completed" for item in self.items) # === SECTION: subagent (s04) === def run_subagent(prompt: str, agent_type: str = "Explore") -> str: sub_tools = [ {"name": "bash", "description": "Run command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}}, ] if agent_type != "Explore": sub_tools += [ {"name": "write_file", "description": "Write file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Edit file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, ] sub_handlers = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"]), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), } sub_msgs = [{"role": "user", "content": prompt}] resp = None for _ in range(30): resp = client.messages.create(model=MODEL, messages=sub_msgs, tools=sub_tools, max_tokens=8000) sub_msgs.append({"role": "assistant", "content": resp.content}) if resp.stop_reason != "tool_use": break results = [] for b in resp.content: if b.type == "tool_use": h = sub_handlers.get(b.name, lambda **kw: "Unknown tool") results.append({"type": "tool_result", "tool_use_id": b.id, "content": str(h(**b.input))[:50000]}) sub_msgs.append({"role": "user", "content": results}) if resp: return "".join(b.text for b in resp.content if hasattr(b, "text")) or "(no summary)" return "(subagent failed)" # === SECTION: skills (s05) === class SkillLoader: def __init__(self, skills_dir: Path): self.skills = {} if skills_dir.exists(): for f in sorted(skills_dir.rglob("SKILL.md")): text = f.read_text() match = re.match(r"^---\n(.*?)\n---\n(.*)", text, re.DOTALL) meta, body = {}, text if match: for line in match.group(1).strip().splitlines(): if ":" in line: k, v = line.split(":", 1) meta[k.strip()] = v.strip() body = match.group(2).strip() name = meta.get("name", f.parent.name) self.skills[name] = {"meta": meta, "body": body} def descriptions(self) -> str: if not self.skills: return "(no skills)" return "\n".join(f" - {n}: {s['meta'].get('description', '-')}" for n, s in self.skills.items()) def load(self, name: str) -> str: s = self.skills.get(name) if not s: return f"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}" return f"\n{s['body']}\n" # === SECTION: compression (s06) === def estimate_tokens(messages: list) -> int: return len(json.dumps(messages, default=str)) // 4 def microcompact(messages: list): indices = [] for i, msg in enumerate(messages): if msg["role"] == "user" and isinstance(msg.get("content"), list): for part in msg["content"]: if isinstance(part, dict) and part.get("type") == "tool_result": indices.append(part) if len(indices) <= 3: return for part in indices[:-3]: if isinstance(part.get("content"), str) and len(part["content"]) > 100: part["content"] = "[cleared]" def auto_compact(messages: list) -> list: TRANSCRIPT_DIR.mkdir(exist_ok=True) path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl" with open(path, "w") as f: for msg in messages: f.write(json.dumps(msg, default=str) + "\n") conv_text = json.dumps(messages, default=str)[:80000] resp = client.messages.create( model=MODEL, messages=[{"role": "user", "content": f"Summarize for continuity:\n{conv_text}"}], max_tokens=2000, ) summary = resp.content[0].text return [ {"role": "user", "content": f"[Compressed. Transcript: {path}]\n{summary}"}, {"role": "assistant", "content": "Understood. Continuing with summary context."}, ] # === SECTION: file_tasks (s07) === class TaskManager: def __init__(self): TASKS_DIR.mkdir(exist_ok=True) def _next_id(self) -> int: ids = [int(f.stem.split("_")[1]) for f in TASKS_DIR.glob("task_*.json")] return max(ids, default=0) + 1 def _load(self, tid: int) -> dict: p = TASKS_DIR / f"task_{tid}.json" if not p.exists(): raise ValueError(f"Task {tid} not found") return json.loads(p.read_text()) def _save(self, task: dict): (TASKS_DIR / f"task_{task['id']}.json").write_text(json.dumps(task, indent=2)) def create(self, subject: str, description: str = "") -> str: task = {"id": self._next_id(), "subject": subject, "description": description, "status": "pending", "owner": None, "blockedBy": [], "blocks": []} self._save(task) return json.dumps(task, indent=2) def get(self, tid: int) -> str: return json.dumps(self._load(tid), indent=2) def update(self, tid: int, status: str = None, add_blocked_by: list = None, add_blocks: list = None) -> str: task = self._load(tid) if status: task["status"] = status if status == "completed": for f in TASKS_DIR.glob("task_*.json"): t = json.loads(f.read_text()) if tid in t.get("blockedBy", []): t["blockedBy"].remove(tid) self._save(t) if status == "deleted": (TASKS_DIR / f"task_{tid}.json").unlink(missing_ok=True) return f"Task {tid} deleted" if add_blocked_by: task["blockedBy"] = list(set(task["blockedBy"] + add_blocked_by)) if add_blocks: task["blocks"] = list(set(task["blocks"] + add_blocks)) self._save(task) return json.dumps(task, indent=2) def list_all(self) -> str: tasks = [json.loads(f.read_text()) for f in sorted(TASKS_DIR.glob("task_*.json"))] if not tasks: return "No tasks." lines = [] for t in tasks: m = {"pending": "[ ]", "in_progress": "[>]", "completed": "[x]"}.get(t["status"], "[?]") owner = f" @{t['owner']}" if t.get("owner") else "" blocked = f" (blocked by: {t['blockedBy']})" if t.get("blockedBy") else "" lines.append(f"{m} #{t['id']}: {t['subject']}{owner}{blocked}") return "\n".join(lines) def claim(self, tid: int, owner: str) -> str: task = self._load(tid) task["owner"] = owner task["status"] = "in_progress" self._save(task) return f"Claimed task #{tid} for {owner}" # === SECTION: background (s08) === class BackgroundManager: def __init__(self): self.tasks = {} self.notifications = Queue() def run(self, command: str, timeout: int = 120) -> str: tid = str(uuid.uuid4())[:8] self.tasks[tid] = {"status": "running", "command": command, "result": None} threading.Thread(target=self._exec, args=(tid, command, timeout), daemon=True).start() return f"Background task {tid} started: {command[:80]}" def _exec(self, tid: str, command: str, timeout: int): try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=timeout) output = (r.stdout + r.stderr).strip()[:50000] self.tasks[tid].update({"status": "completed", "result": output or "(no output)"}) except Exception as e: self.tasks[tid].update({"status": "error", "result": str(e)}) self.notifications.put({"task_id": tid, "status": self.tasks[tid]["status"], "result": self.tasks[tid]["result"][:500]}) def check(self, tid: str = None) -> str: if tid: t = self.tasks.get(tid) return f"[{t['status']}] {t.get('result', '(running)')}" if t else f"Unknown: {tid}" return "\n".join(f"{k}: [{v['status']}] {v['command'][:60]}" for k, v in self.tasks.items()) or "No bg tasks." def drain(self) -> list: notifs = [] while not self.notifications.empty(): notifs.append(self.notifications.get_nowait()) return notifs # === SECTION: messaging (s09) === class MessageBus: def __init__(self): INBOX_DIR.mkdir(parents=True, exist_ok=True) def send(self, sender: str, to: str, content: str, msg_type: str = "message", extra: dict = None) -> str: msg = {"type": msg_type, "from": sender, "content": content, "timestamp": time.time()} if extra: msg.update(extra) with open(INBOX_DIR / f"{to}.jsonl", "a") as f: f.write(json.dumps(msg) + "\n") return f"Sent {msg_type} to {to}" def read_inbox(self, name: str) -> list: path = INBOX_DIR / f"{name}.jsonl" if not path.exists(): return [] msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l] path.write_text("") return msgs def broadcast(self, sender: str, content: str, names: list) -> str: count = 0 for n in names: if n != sender: self.send(sender, n, content, "broadcast") count += 1 return f"Broadcast to {count} teammates" # === SECTION: shutdown + plan tracking (s10) === shutdown_requests = {} plan_requests = {} # === SECTION: team (s09/s11) === class TeammateManager: def __init__(self, bus: MessageBus, task_mgr: TaskManager): TEAM_DIR.mkdir(exist_ok=True) self.bus = bus self.task_mgr = task_mgr self.config_path = TEAM_DIR / "config.json" self.config = self._load() self.threads = {} def _load(self) -> dict: if self.config_path.exists(): return json.loads(self.config_path.read_text()) return {"team_name": "default", "members": []} def _save(self): self.config_path.write_text(json.dumps(self.config, indent=2)) def _find(self, name: str) -> dict: for m in self.config["members"]: if m["name"] == name: return m return None def spawn(self, name: str, role: str, prompt: str) -> str: member = self._find(name) if member: if member["status"] not in ("idle", "shutdown"): return f"Error: '{name}' is currently {member['status']}" member["status"] = "working" member["role"] = role else: member = {"name": name, "role": role, "status": "working"} self.config["members"].append(member) self._save() threading.Thread(target=self._loop, args=(name, role, prompt), daemon=True).start() return f"Spawned '{name}' (role: {role})" def _set_status(self, name: str, status: str): member = self._find(name) if member: member["status"] = status self._save() def _loop(self, name: str, role: str, prompt: str): team_name = self.config["team_name"] sys_prompt = (f"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. " f"Use idle when done with current work. You may auto-claim tasks.") messages = [{"role": "user", "content": prompt}] tools = [ {"name": "bash", "description": "Run command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Edit file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "send_message", "description": "Send message.", "input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}}, "required": ["to", "content"]}}, {"name": "idle", "description": "Signal no more work.", "input_schema": {"type": "object", "properties": {}}}, {"name": "claim_task", "description": "Claim task by ID.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}}, ] while True: # -- WORK PHASE -- for _ in range(50): inbox = self.bus.read_inbox(name) for msg in inbox: if msg.get("type") == "shutdown_request": self._set_status(name, "shutdown") return messages.append({"role": "user", "content": json.dumps(msg)}) try: response = client.messages.create( model=MODEL, system=sys_prompt, messages=messages, tools=tools, max_tokens=8000) except Exception: self._set_status(name, "shutdown") return messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": break results = [] idle_requested = False for block in response.content: if block.type == "tool_use": if block.name == "idle": idle_requested = True output = "Entering idle phase." elif block.name == "claim_task": output = self.task_mgr.claim(block.input["task_id"], name) elif block.name == "send_message": output = self.bus.send(name, block.input["to"], block.input["content"]) else: dispatch = {"bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"]), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"])} output = dispatch.get(block.name, lambda **kw: "Unknown")(**block.input) print(f" [{name}] {block.name}: {str(output)[:120]}") results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) messages.append({"role": "user", "content": results}) if idle_requested: break # -- IDLE PHASE: poll for messages and unclaimed tasks -- self._set_status(name, "idle") resume = False for _ in range(IDLE_TIMEOUT // max(POLL_INTERVAL, 1)): time.sleep(POLL_INTERVAL) inbox = self.bus.read_inbox(name) if inbox: for msg in inbox: if msg.get("type") == "shutdown_request": self._set_status(name, "shutdown") return messages.append({"role": "user", "content": json.dumps(msg)}) resume = True break unclaimed = [] for f in sorted(TASKS_DIR.glob("task_*.json")): t = json.loads(f.read_text()) if t.get("status") == "pending" and not t.get("owner") and not t.get("blockedBy"): unclaimed.append(t) if unclaimed: task = unclaimed[0] self.task_mgr.claim(task["id"], name) # Identity re-injection for compressed contexts if len(messages) <= 3: messages.insert(0, {"role": "user", "content": f"You are '{name}', role: {role}, team: {team_name}."}) messages.insert(1, {"role": "assistant", "content": f"I am {name}. Continuing."}) messages.append({"role": "user", "content": f"Task #{task['id']}: {task['subject']}\n{task.get('description', '')}"}) messages.append({"role": "assistant", "content": f"Claimed task #{task['id']}. Working on it."}) resume = True break if not resume: self._set_status(name, "shutdown") return self._set_status(name, "working") def list_all(self) -> str: if not self.config["members"]: return "No teammates." lines = [f"Team: {self.config['team_name']}"] for m in self.config["members"]: lines.append(f" {m['name']} ({m['role']}): {m['status']}") return "\n".join(lines) def member_names(self) -> list: return [m["name"] for m in self.config["members"]] # === SECTION: global_instances === TODO = TodoManager() SKILLS = SkillLoader(SKILLS_DIR) TASK_MGR = TaskManager() BG = BackgroundManager() BUS = MessageBus() TEAM = TeammateManager(BUS, TASK_MGR) # === SECTION: system_prompt === SYSTEM = f"""You are a coding agent at {WORKDIR}. Use tools to solve tasks. Prefer task_create/task_update/task_list for multi-step work. Use TodoWrite for short checklists. Use task for subagent delegation. Use load_skill for specialized knowledge. Skills: {SKILLS.descriptions()}""" # === SECTION: shutdown_protocol (s10) === def handle_shutdown_request(teammate: str) -> str: req_id = str(uuid.uuid4())[:8] shutdown_requests[req_id] = {"target": teammate, "status": "pending"} BUS.send("lead", teammate, "Please shut down.", "shutdown_request", {"request_id": req_id}) return f"Shutdown request {req_id} sent to '{teammate}'" # === SECTION: plan_approval (s10) === def handle_plan_review(request_id: str, approve: bool, feedback: str = "") -> str: req = plan_requests.get(request_id) if not req: return f"Error: Unknown plan request_id '{request_id}'" req["status"] = "approved" if approve else "rejected" BUS.send("lead", req["from"], feedback, "plan_approval_response", {"request_id": request_id, "approve": approve, "feedback": feedback}) return f"Plan {req['status']} for '{req['from']}'" # === SECTION: tool_dispatch (s02) === TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), "TodoWrite": lambda **kw: TODO.update(kw["items"]), "task": lambda **kw: run_subagent(kw["prompt"], kw.get("agent_type", "Explore")), "load_skill": lambda **kw: SKILLS.load(kw["name"]), "compress": lambda **kw: "Compressing...", "background_run": lambda **kw: BG.run(kw["command"], kw.get("timeout", 120)), "check_background": lambda **kw: BG.check(kw.get("task_id")), "task_create": lambda **kw: TASK_MGR.create(kw["subject"], kw.get("description", "")), "task_get": lambda **kw: TASK_MGR.get(kw["task_id"]), "task_update": lambda **kw: TASK_MGR.update(kw["task_id"], kw.get("status"), kw.get("add_blocked_by"), kw.get("add_blocks")), "task_list": lambda **kw: TASK_MGR.list_all(), "spawn_teammate": lambda **kw: TEAM.spawn(kw["name"], kw["role"], kw["prompt"]), "list_teammates": lambda **kw: TEAM.list_all(), "send_message": lambda **kw: BUS.send("lead", kw["to"], kw["content"], kw.get("msg_type", "message")), "read_inbox": lambda **kw: json.dumps(BUS.read_inbox("lead"), indent=2), "broadcast": lambda **kw: BUS.broadcast("lead", kw["content"], TEAM.member_names()), "shutdown_request": lambda **kw: handle_shutdown_request(kw["teammate"]), "plan_approval": lambda **kw: handle_plan_review(kw["request_id"], kw["approve"], kw.get("feedback", "")), "idle": lambda **kw: "Lead does not idle.", "claim_task": lambda **kw: TASK_MGR.claim(kw["task_id"], "lead"), } TOOLS = [ {"name": "bash", "description": "Run a shell command.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"]}}, {"name": "read_file", "description": "Read file contents.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "limit": {"type": "integer"}}, "required": ["path"]}}, {"name": "write_file", "description": "Write content to file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "content": {"type": "string"}}, "required": ["path", "content"]}}, {"name": "edit_file", "description": "Replace exact text in file.", "input_schema": {"type": "object", "properties": {"path": {"type": "string"}, "old_text": {"type": "string"}, "new_text": {"type": "string"}}, "required": ["path", "old_text", "new_text"]}}, {"name": "TodoWrite", "description": "Update task tracking list.", "input_schema": {"type": "object", "properties": {"items": {"type": "array", "items": {"type": "object", "properties": {"content": {"type": "string"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]}, "activeForm": {"type": "string"}}, "required": ["content", "status", "activeForm"]}}}, "required": ["items"]}}, {"name": "task", "description": "Spawn a subagent for isolated exploration or work.", "input_schema": {"type": "object", "properties": {"prompt": {"type": "string"}, "agent_type": {"type": "string", "enum": ["Explore", "general-purpose"]}}, "required": ["prompt"]}}, {"name": "load_skill", "description": "Load specialized knowledge by name.", "input_schema": {"type": "object", "properties": {"name": {"type": "string"}}, "required": ["name"]}}, {"name": "compress", "description": "Manually compress conversation context.", "input_schema": {"type": "object", "properties": {}}}, {"name": "background_run", "description": "Run command in background thread.", "input_schema": {"type": "object", "properties": {"command": {"type": "string"}, "timeout": {"type": "integer"}}, "required": ["command"]}}, {"name": "check_background", "description": "Check background task status.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "string"}}}}, {"name": "task_create", "description": "Create a persistent file task.", "input_schema": {"type": "object", "properties": {"subject": {"type": "string"}, "description": {"type": "string"}}, "required": ["subject"]}}, {"name": "task_get", "description": "Get task details by ID.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}}, {"name": "task_update", "description": "Update task status or dependencies.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed", "deleted"]}, "add_blocked_by": {"type": "array", "items": {"type": "integer"}}, "add_blocks": {"type": "array", "items": {"type": "integer"}}}, "required": ["task_id"]}}, {"name": "task_list", "description": "List all tasks.", "input_schema": {"type": "object", "properties": {}}}, {"name": "spawn_teammate", "description": "Spawn a persistent autonomous teammate.", "input_schema": {"type": "object", "properties": {"name": {"type": "string"}, "role": {"type": "string"}, "prompt": {"type": "string"}}, "required": ["name", "role", "prompt"]}}, {"name": "list_teammates", "description": "List all teammates.", "input_schema": {"type": "object", "properties": {}}}, {"name": "send_message", "description": "Send a message to a teammate.", "input_schema": {"type": "object", "properties": {"to": {"type": "string"}, "content": {"type": "string"}, "msg_type": {"type": "string", "enum": list(VALID_MSG_TYPES)}}, "required": ["to", "content"]}}, {"name": "read_inbox", "description": "Read and drain the lead's inbox.", "input_schema": {"type": "object", "properties": {}}}, {"name": "broadcast", "description": "Send message to all teammates.", "input_schema": {"type": "object", "properties": {"content": {"type": "string"}}, "required": ["content"]}}, {"name": "shutdown_request", "description": "Request a teammate to shut down.", "input_schema": {"type": "object", "properties": {"teammate": {"type": "string"}}, "required": ["teammate"]}}, {"name": "plan_approval", "description": "Approve or reject a teammate's plan.", "input_schema": {"type": "object", "properties": {"request_id": {"type": "string"}, "approve": {"type": "boolean"}, "feedback": {"type": "string"}}, "required": ["request_id", "approve"]}}, {"name": "idle", "description": "Enter idle state.", "input_schema": {"type": "object", "properties": {}}}, {"name": "claim_task", "description": "Claim a task from the board.", "input_schema": {"type": "object", "properties": {"task_id": {"type": "integer"}}, "required": ["task_id"]}}, ] # === SECTION: agent_loop === def agent_loop(messages: list): rounds_without_todo = 0 while True: # s06: compression pipeline microcompact(messages) if estimate_tokens(messages) > TOKEN_THRESHOLD: print("[auto-compact triggered]") messages[:] = auto_compact(messages) # s08: drain background notifications notifs = BG.drain() if notifs: txt = "\n".join(f"[bg:{n['task_id']}] {n['status']}: {n['result']}" for n in notifs) messages.append({"role": "user", "content": f"\n{txt}\n"}) messages.append({"role": "assistant", "content": "Noted background results."}) # s10: check lead inbox inbox = BUS.read_inbox("lead") if inbox: messages.append({"role": "user", "content": f"{json.dumps(inbox, indent=2)}"}) messages.append({"role": "assistant", "content": "Noted inbox messages."}) # LLM call response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return # Tool execution results = [] used_todo = False manual_compress = False for block in response.content: if block.type == "tool_use": if block.name == "compress": manual_compress = True handler = TOOL_HANDLERS.get(block.name) try: output = handler(**block.input) if handler else f"Unknown tool: {block.name}" except Exception as e: output = f"Error: {e}" print(f"> {block.name}: {str(output)[:200]}") results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)}) if block.name == "TodoWrite": used_todo = True # s03: nag reminder (only when todo workflow is active) rounds_without_todo = 0 if used_todo else rounds_without_todo + 1 if TODO.has_open_items() and rounds_without_todo >= 3: results.insert(0, {"type": "text", "text": "Update your todos."}) messages.append({"role": "user", "content": results}) # s06: manual compress if manual_compress: print("[manual compact]") messages[:] = auto_compact(messages) # === SECTION: repl === if __name__ == "__main__": history = [] while True: try: query = input("\033[36ms_full >> \033[0m") except (EOFError, KeyboardInterrupt): break if query.strip().lower() in ("q", "exit", ""): break if query.strip() == "/compact": if history: print("[manual compact via /compact]") history[:] = auto_compact(history) continue if query.strip() == "/tasks": print(TASK_MGR.list_all()) continue if query.strip() == "/team": print(TEAM.list_all()) continue if query.strip() == "/inbox": print(json.dumps(BUS.read_inbox("lead"), indent=2)) continue history.append({"role": "user", "content": query}) agent_loop(history) print() ================================================ FILE: docs/en/s01-the-agent-loop.md ================================================ # s01: The Agent Loop `[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"One loop & Bash is all you need"* -- one tool + one loop = an agent. > > **Harness layer**: The loop -- the model's first connection to the real world. ## Problem A language model can reason about code, but it can't *touch* the real world -- can't read files, run tests, or check errors. Without a loop, every tool call requires you to manually copy-paste results back. You become the loop. ## Solution ``` +--------+ +-------+ +---------+ | User | ---> | LLM | ---> | Tool | | prompt | | | | execute | +--------+ +---+---+ +----+----+ ^ | | tool_result | +----------------+ (loop until stop_reason != "tool_use") ``` One exit condition controls the entire flow. The loop runs until the model stops calling tools. ## How It Works 1. User prompt becomes the first message. ```python messages.append({"role": "user", "content": query}) ``` 2. Send messages + tool definitions to the LLM. ```python response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) ``` 3. Append the assistant response. Check `stop_reason` -- if the model didn't call a tool, we're done. ```python messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return ``` 4. Execute each tool call, collect results, append as a user message. Loop back to step 2. ```python results = [] for block in response.content: if block.type == "tool_use": output = run_bash(block.input["command"]) results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) messages.append({"role": "user", "content": results}) ``` Assembled into one function: ```python def agent_loop(query): messages = [{"role": "user", "content": query}] while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": output = run_bash(block.input["command"]) results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) messages.append({"role": "user", "content": results}) ``` That's the entire agent in under 30 lines. Everything else in this course layers on top -- without changing the loop. ## What Changed | Component | Before | After | |---------------|------------|--------------------------------| | Agent loop | (none) | `while True` + stop_reason | | Tools | (none) | `bash` (one tool) | | Messages | (none) | Accumulating list | | Control flow | (none) | `stop_reason != "tool_use"` | ## Try It ```sh cd learn-claude-code python agents/s01_agent_loop.py ``` 1. `Create a file called hello.py that prints "Hello, World!"` 2. `List all Python files in this directory` 3. `What is the current git branch?` 4. `Create a directory called test_output and write 3 files in it` ================================================ FILE: docs/en/s02-tool-use.md ================================================ # s02: Tool Use `s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"Adding a tool means adding one handler"* -- the loop stays the same; new tools register into the dispatch map. > > **Harness layer**: Tool dispatch -- expanding what the model can reach. ## Problem With only `bash`, the agent shells out for everything. `cat` truncates unpredictably, `sed` fails on special characters, and every bash call is an unconstrained security surface. Dedicated tools like `read_file` and `write_file` let you enforce path sandboxing at the tool level. The key insight: adding tools does not require changing the loop. ## Solution ``` +--------+ +-------+ +------------------+ | User | ---> | LLM | ---> | Tool Dispatch | | prompt | | | | { | +--------+ +---+---+ | bash: run_bash | ^ | read: run_read | | | write: run_wr | +-----------+ edit: run_edit | tool_result | } | +------------------+ The dispatch map is a dict: {tool_name: handler_function}. One lookup replaces any if/elif chain. ``` ## How It Works 1. Each tool gets a handler function. Path sandboxing prevents workspace escape. ```python def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_read(path: str, limit: int = None) -> str: text = safe_path(path).read_text() lines = text.splitlines() if limit and limit < len(lines): lines = lines[:limit] return "\n".join(lines)[:50000] ``` 2. The dispatch map links tool names to handlers. ```python TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), } ``` 3. In the loop, look up the handler by name. The loop body itself is unchanged from s01. ```python for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) output = handler(**block.input) if handler \ else f"Unknown tool: {block.name}" results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) ``` Add a tool = add a handler + add a schema entry. The loop never changes. ## What Changed From s01 | Component | Before (s01) | After (s02) | |----------------|--------------------|----------------------------| | Tools | 1 (bash only) | 4 (bash, read, write, edit)| | Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict | | Path safety | None | `safe_path()` sandbox | | Agent loop | Unchanged | Unchanged | ## Try It ```sh cd learn-claude-code python agents/s02_tool_use.py ``` 1. `Read the file requirements.txt` 2. `Create a file called greet.py with a greet(name) function` 3. `Edit greet.py to add a docstring to the function` 4. `Read greet.py to verify the edit worked` ================================================ FILE: docs/en/s03-todo-write.md ================================================ # s03: TodoWrite `s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"An agent without a plan drifts"* -- list the steps first, then execute. > > **Harness layer**: Planning -- keeping the model on course without scripting the route. ## Problem On multi-step tasks, the model loses track. It repeats work, skips steps, or wanders off. Long conversations make this worse -- the system prompt fades as tool results fill the context. A 10-step refactoring might complete steps 1-3, then the model starts improvising because it forgot steps 4-10. ## Solution ``` +--------+ +-------+ +---------+ | User | ---> | LLM | ---> | Tools | | prompt | | | | + todo | +--------+ +---+---+ +----+----+ ^ | | tool_result | +----------------+ | +-----------+-----------+ | TodoManager state | | [ ] task A | | [>] task B <- doing | | [x] task C | +-----------------------+ | if rounds_since_todo >= 3: inject into tool_result ``` ## How It Works 1. TodoManager stores items with statuses. Only one item can be `in_progress` at a time. ```python class TodoManager: def update(self, items: list) -> str: validated, in_progress_count = [], 0 for item in items: status = item.get("status", "pending") if status == "in_progress": in_progress_count += 1 validated.append({"id": item["id"], "text": item["text"], "status": status}) if in_progress_count > 1: raise ValueError("Only one task can be in_progress") self.items = validated return self.render() ``` 2. The `todo` tool goes into the dispatch map like any other tool. ```python TOOL_HANDLERS = { # ...base tools... "todo": lambda **kw: TODO.update(kw["items"]), } ``` 3. A nag reminder injects a nudge if the model goes 3+ rounds without calling `todo`. ```python if rounds_since_todo >= 3 and messages: last = messages[-1] if last["role"] == "user" and isinstance(last.get("content"), list): last["content"].insert(0, { "type": "text", "text": "Update your todos.", }) ``` The "one in_progress at a time" constraint forces sequential focus. The nag reminder creates accountability. ## What Changed From s02 | Component | Before (s02) | After (s03) | |----------------|------------------|----------------------------| | Tools | 4 | 5 (+todo) | | Planning | None | TodoManager with statuses | | Nag injection | None | `` after 3 rounds| | Agent loop | Simple dispatch | + rounds_since_todo counter| ## Try It ```sh cd learn-claude-code python agents/s03_todo_write.py ``` 1. `Refactor the file hello.py: add type hints, docstrings, and a main guard` 2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py` 3. `Review all Python files and fix any style issues` ================================================ FILE: docs/en/s04-subagent.md ================================================ # s04: Subagents `s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"Break big tasks down; each subtask gets a clean context"* -- subagents use independent messages[], keeping the main conversation clean. > > **Harness layer**: Context isolation -- protecting the model's clarity of thought. ## Problem As the agent works, its messages array grows. Every file read, every bash output stays in context permanently. "What testing framework does this project use?" might require reading 5 files, but the parent only needs the answer: "pytest." ## Solution ``` Parent agent Subagent +------------------+ +------------------+ | messages=[...] | | messages=[] | <-- fresh | | dispatch | | | tool: task | ----------> | while tool_use: | | prompt="..." | | call tools | | | summary | append results | | result = "..." | <---------- | return last text | +------------------+ +------------------+ Parent context stays clean. Subagent context is discarded. ``` ## How It Works 1. The parent gets a `task` tool. The child gets all base tools except `task` (no recursive spawning). ```python PARENT_TOOLS = CHILD_TOOLS + [ {"name": "task", "description": "Spawn a subagent with fresh context.", "input_schema": { "type": "object", "properties": {"prompt": {"type": "string"}}, "required": ["prompt"], }}, ] ``` 2. The subagent starts with `messages=[]` and runs its own loop. Only the final text returns to the parent. ```python def run_subagent(prompt: str) -> str: sub_messages = [{"role": "user", "content": prompt}] for _ in range(30): # safety limit response = client.messages.create( model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages, tools=CHILD_TOOLS, max_tokens=8000, ) sub_messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": break results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) output = handler(**block.input) results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)[:50000]}) sub_messages.append({"role": "user", "content": results}) return "".join( b.text for b in response.content if hasattr(b, "text") ) or "(no summary)" ``` The child's entire message history (possibly 30+ tool calls) is discarded. The parent receives a one-paragraph summary as a normal `tool_result`. ## What Changed From s03 | Component | Before (s03) | After (s04) | |----------------|------------------|---------------------------| | Tools | 5 | 5 (base) + task (parent) | | Context | Single shared | Parent + child isolation | | Subagent | None | `run_subagent()` function | | Return value | N/A | Summary text only | ## Try It ```sh cd learn-claude-code python agents/s04_subagent.py ``` 1. `Use a subtask to find what testing framework this project uses` 2. `Delegate: read all .py files and summarize what each one does` 3. `Use a task to create a new module, then verify it from here` ================================================ FILE: docs/en/s05-skill-loading.md ================================================ # s05: Skills `s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"Load knowledge when you need it, not upfront"* -- inject via tool_result, not the system prompt. > > **Harness layer**: On-demand knowledge -- domain expertise, loaded when the model asks. ## Problem You want the agent to follow domain-specific workflows: git conventions, testing patterns, code review checklists. Putting everything in the system prompt wastes tokens on unused skills. 10 skills at 2000 tokens each = 20,000 tokens, most of which are irrelevant to any given task. ## Solution ``` System prompt (Layer 1 -- always present): +--------------------------------------+ | You are a coding agent. | | Skills available: | | - git: Git workflow helpers | ~100 tokens/skill | - test: Testing best practices | +--------------------------------------+ When model calls load_skill("git"): +--------------------------------------+ | tool_result (Layer 2 -- on demand): | | | | Full git workflow instructions... | ~2000 tokens | Step 1: ... | | | +--------------------------------------+ ``` Layer 1: skill *names* in system prompt (cheap). Layer 2: full *body* via tool_result (on demand). ## How It Works 1. Each skill is a directory containing a `SKILL.md` with YAML frontmatter. ``` skills/ pdf/ SKILL.md # ---\n name: pdf\n description: Process PDF files\n ---\n ... code-review/ SKILL.md # ---\n name: code-review\n description: Review code\n ---\n ... ``` 2. SkillLoader scans for `SKILL.md` files, uses the directory name as the skill identifier. ```python class SkillLoader: def __init__(self, skills_dir: Path): self.skills = {} for f in sorted(skills_dir.rglob("SKILL.md")): text = f.read_text() meta, body = self._parse_frontmatter(text) name = meta.get("name", f.parent.name) self.skills[name] = {"meta": meta, "body": body} def get_descriptions(self) -> str: lines = [] for name, skill in self.skills.items(): desc = skill["meta"].get("description", "") lines.append(f" - {name}: {desc}") return "\n".join(lines) def get_content(self, name: str) -> str: skill = self.skills.get(name) if not skill: return f"Error: Unknown skill '{name}'." return f"\n{skill['body']}\n" ``` 3. Layer 1 goes into the system prompt. Layer 2 is just another tool handler. ```python SYSTEM = f"""You are a coding agent at {WORKDIR}. Skills available: {SKILL_LOADER.get_descriptions()}""" TOOL_HANDLERS = { # ...base tools... "load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]), } ``` The model learns what skills exist (cheap) and loads them when relevant (expensive). ## What Changed From s04 | Component | Before (s04) | After (s05) | |----------------|------------------|----------------------------| | Tools | 5 (base + task) | 5 (base + load_skill) | | System prompt | Static string | + skill descriptions | | Knowledge | None | skills/\*/SKILL.md files | | Injection | None | Two-layer (system + result)| ## Try It ```sh cd learn-claude-code python agents/s05_skill_loading.py ``` 1. `What skills are available?` 2. `Load the agent-builder skill and follow its instructions` 3. `I need to do a code review -- load the relevant skill first` 4. `Build an MCP server using the mcp-builder skill` ================================================ FILE: docs/en/s06-context-compact.md ================================================ # s06: Context Compact `s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12` > *"Context will fill up; you need a way to make room"* -- three-layer compression strategy for infinite sessions. > > **Harness layer**: Compression -- clean memory for infinite sessions. ## Problem The context window is finite. A single `read_file` on a 1000-line file costs ~4000 tokens. After reading 30 files and running 20 bash commands, you hit 100,000+ tokens. The agent cannot work on large codebases without compression. ## Solution Three layers, increasing in aggressiveness: ``` Every turn: +------------------+ | Tool call result | +------------------+ | v [Layer 1: micro_compact] (silent, every turn) Replace tool_result > 3 turns old with "[Previous: used {tool_name}]" | v [Check: tokens > 50000?] | | no yes | | v v continue [Layer 2: auto_compact] Save transcript to .transcripts/ LLM summarizes conversation. Replace all messages with [summary]. | v [Layer 3: compact tool] Model calls compact explicitly. Same summarization as auto_compact. ``` ## How It Works 1. **Layer 1 -- micro_compact**: Before each LLM call, replace old tool results with placeholders. ```python def micro_compact(messages: list) -> list: tool_results = [] for i, msg in enumerate(messages): if msg["role"] == "user" and isinstance(msg.get("content"), list): for j, part in enumerate(msg["content"]): if isinstance(part, dict) and part.get("type") == "tool_result": tool_results.append((i, j, part)) if len(tool_results) <= KEEP_RECENT: return messages for _, _, part in tool_results[:-KEEP_RECENT]: if len(part.get("content", "")) > 100: part["content"] = f"[Previous: used {tool_name}]" return messages ``` 2. **Layer 2 -- auto_compact**: When tokens exceed threshold, save full transcript to disk, then ask the LLM to summarize. ```python def auto_compact(messages: list) -> list: # Save transcript for recovery transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl" with open(transcript_path, "w") as f: for msg in messages: f.write(json.dumps(msg, default=str) + "\n") # LLM summarizes response = client.messages.create( model=MODEL, messages=[{"role": "user", "content": "Summarize this conversation for continuity..." + json.dumps(messages, default=str)[:80000]}], max_tokens=2000, ) return [ {"role": "user", "content": f"[Compressed]\n\n{response.content[0].text}"}, {"role": "assistant", "content": "Understood. Continuing."}, ] ``` 3. **Layer 3 -- manual compact**: The `compact` tool triggers the same summarization on demand. 4. The loop integrates all three: ```python def agent_loop(messages: list): while True: micro_compact(messages) # Layer 1 if estimate_tokens(messages) > THRESHOLD: messages[:] = auto_compact(messages) # Layer 2 response = client.messages.create(...) # ... tool execution ... if manual_compact: messages[:] = auto_compact(messages) # Layer 3 ``` Transcripts preserve full history on disk. Nothing is truly lost -- just moved out of active context. ## What Changed From s05 | Component | Before (s05) | After (s06) | |----------------|------------------|----------------------------| | Tools | 5 | 5 (base + compact) | | Context mgmt | None | Three-layer compression | | Micro-compact | None | Old results -> placeholders| | Auto-compact | None | Token threshold trigger | | Transcripts | None | Saved to .transcripts/ | ## Try It ```sh cd learn-claude-code python agents/s06_context_compact.py ``` 1. `Read every Python file in the agents/ directory one by one` (watch micro-compact replace old results) 2. `Keep reading files until compression triggers automatically` 3. `Use the compact tool to manually compress the conversation` ================================================ FILE: docs/en/s07-task-system.md ================================================ # s07: Task System `s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12` > *"Break big goals into small tasks, order them, persist to disk"* -- a file-based task graph with dependencies, laying the foundation for multi-agent collaboration. > > **Harness layer**: Persistent tasks -- goals that outlive any single conversation. ## Problem s03's TodoManager is a flat checklist in memory: no ordering, no dependencies, no status beyond done-or-not. Real goals have structure -- task B depends on task A, tasks C and D can run in parallel, task E waits for both C and D. Without explicit relationships, the agent can't tell what's ready, what's blocked, or what can run concurrently. And because the list lives only in memory, context compression (s06) wipes it clean. ## Solution Promote the checklist into a **task graph** persisted to disk. Each task is a JSON file with status, dependencies (`blockedBy`), and dependents (`blocks`). The graph answers three questions at any moment: - **What's ready?** -- tasks with `pending` status and empty `blockedBy`. - **What's blocked?** -- tasks waiting on unfinished dependencies. - **What's done?** -- `completed` tasks, whose completion automatically unblocks dependents. ``` .tasks/ task_1.json {"id":1, "status":"completed"} task_2.json {"id":2, "blockedBy":[1], "status":"pending"} task_3.json {"id":3, "blockedBy":[1], "status":"pending"} task_4.json {"id":4, "blockedBy":[2,3], "status":"pending"} Task graph (DAG): +----------+ +--> | task 2 | --+ | | pending | | +----------+ +----------+ +--> +----------+ | task 1 | | task 4 | | completed| --> +----------+ +--> | blocked | +----------+ | task 3 | --+ +----------+ | pending | +----------+ Ordering: task 1 must finish before 2 and 3 Parallelism: tasks 2 and 3 can run at the same time Dependencies: task 4 waits for both 2 and 3 Status: pending -> in_progress -> completed ``` This task graph becomes the coordination backbone for everything after s07: background execution (s08), multi-agent teams (s09+), and worktree isolation (s12) all read from and write to this same structure. ## How It Works 1. **TaskManager**: one JSON file per task, CRUD with dependency graph. ```python class TaskManager: def __init__(self, tasks_dir: Path): self.dir = tasks_dir self.dir.mkdir(exist_ok=True) self._next_id = self._max_id() + 1 def create(self, subject, description=""): task = {"id": self._next_id, "subject": subject, "status": "pending", "blockedBy": [], "blocks": [], "owner": ""} self._save(task) self._next_id += 1 return json.dumps(task, indent=2) ``` 2. **Dependency resolution**: completing a task clears its ID from every other task's `blockedBy` list, automatically unblocking dependents. ```python def _clear_dependency(self, completed_id): for f in self.dir.glob("task_*.json"): task = json.loads(f.read_text()) if completed_id in task.get("blockedBy", []): task["blockedBy"].remove(completed_id) self._save(task) ``` 3. **Status + dependency wiring**: `update` handles transitions and dependency edges. ```python def update(self, task_id, status=None, add_blocked_by=None, add_blocks=None): task = self._load(task_id) if status: task["status"] = status if status == "completed": self._clear_dependency(task_id) self._save(task) ``` 4. Four task tools go into the dispatch map. ```python TOOL_HANDLERS = { # ...base tools... "task_create": lambda **kw: TASKS.create(kw["subject"]), "task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status")), "task_list": lambda **kw: TASKS.list_all(), "task_get": lambda **kw: TASKS.get(kw["task_id"]), } ``` From s07 onward, the task graph is the default for multi-step work. s03's Todo remains for quick single-session checklists. ## What Changed From s06 | Component | Before (s06) | After (s07) | |---|---|---| | Tools | 5 | 8 (`task_create/update/list/get`) | | Planning model | Flat checklist (in-memory) | Task graph with dependencies (on disk) | | Relationships | None | `blockedBy` + `blocks` edges | | Status tracking | Done or not | `pending` -> `in_progress` -> `completed` | | Persistence | Lost on compression | Survives compression and restarts | ## Try It ```sh cd learn-claude-code python agents/s07_task_system.py ``` 1. `Create 3 tasks: "Setup project", "Write code", "Write tests". Make them depend on each other in order.` 2. `List all tasks and show the dependency graph` 3. `Complete task 1 and then list tasks to see task 2 unblocked` 4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse` ================================================ FILE: docs/en/s08-background-tasks.md ================================================ # s08: Background Tasks `s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12` > *"Run slow operations in the background; the agent keeps thinking"* -- daemon threads run commands, inject notifications on completion. > > **Harness layer**: Background execution -- the model thinks while the harness waits. ## Problem Some commands take minutes: `npm install`, `pytest`, `docker build`. With a blocking loop, the model sits idle waiting. If the user asks "install dependencies and while that runs, create the config file," the agent does them sequentially, not in parallel. ## Solution ``` Main thread Background thread +-----------------+ +-----------------+ | agent loop | | subprocess runs | | ... | | ... | | [LLM call] <---+------- | enqueue(result) | | ^drain queue | +-----------------+ +-----------------+ Timeline: Agent --[spawn A]--[spawn B]--[other work]---- | | v v [A runs] [B runs] (parallel) | | +-- results injected before next LLM call --+ ``` ## How It Works 1. BackgroundManager tracks tasks with a thread-safe notification queue. ```python class BackgroundManager: def __init__(self): self.tasks = {} self._notification_queue = [] self._lock = threading.Lock() ``` 2. `run()` starts a daemon thread and returns immediately. ```python def run(self, command: str) -> str: task_id = str(uuid.uuid4())[:8] self.tasks[task_id] = {"status": "running", "command": command} thread = threading.Thread( target=self._execute, args=(task_id, command), daemon=True) thread.start() return f"Background task {task_id} started" ``` 3. When the subprocess finishes, its result goes into the notification queue. ```python def _execute(self, task_id, command): try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=300) output = (r.stdout + r.stderr).strip()[:50000] except subprocess.TimeoutExpired: output = "Error: Timeout (300s)" with self._lock: self._notification_queue.append({ "task_id": task_id, "result": output[:500]}) ``` 4. The agent loop drains notifications before each LLM call. ```python def agent_loop(messages: list): while True: notifs = BG.drain_notifications() if notifs: notif_text = "\n".join( f"[bg:{n['task_id']}] {n['result']}" for n in notifs) messages.append({"role": "user", "content": f"\n{notif_text}\n" f""}) messages.append({"role": "assistant", "content": "Noted background results."}) response = client.messages.create(...) ``` The loop stays single-threaded. Only subprocess I/O is parallelized. ## What Changed From s07 | Component | Before (s07) | After (s08) | |----------------|------------------|----------------------------| | Tools | 8 | 6 (base + background_run + check)| | Execution | Blocking only | Blocking + background threads| | Notification | None | Queue drained per loop | | Concurrency | None | Daemon threads | ## Try It ```sh cd learn-claude-code python agents/s08_background_tasks.py ``` 1. `Run "sleep 5 && echo done" in the background, then create a file while it runs` 2. `Start 3 background tasks: "sleep 2", "sleep 4", "sleep 6". Check their status.` 3. `Run pytest in the background and keep working on other things` ================================================ FILE: docs/en/s09-agent-teams.md ================================================ # s09: Agent Teams `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12` > *"When the task is too big for one, delegate to teammates"* -- persistent teammates + async mailboxes. > > **Harness layer**: Team mailboxes -- multiple models, coordinated through files. ## Problem Subagents (s04) are disposable: spawn, work, return summary, die. No identity, no memory between invocations. Background tasks (s08) run shell commands but can't make LLM-guided decisions. Real teamwork needs: (1) persistent agents that outlive a single prompt, (2) identity and lifecycle management, (3) a communication channel between agents. ## Solution ``` Teammate lifecycle: spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN Communication: .team/ config.json <- team roster + statuses inbox/ alice.jsonl <- append-only, drain-on-read bob.jsonl lead.jsonl +--------+ send("alice","bob","...") +--------+ | alice | -----------------------------> | bob | | loop | bob.jsonl << {json_line} | loop | +--------+ +--------+ ^ | | BUS.read_inbox("alice") | +---- alice.jsonl -> read + drain ---------+ ``` ## How It Works 1. TeammateManager maintains config.json with the team roster. ```python class TeammateManager: def __init__(self, team_dir: Path): self.dir = team_dir self.dir.mkdir(exist_ok=True) self.config_path = self.dir / "config.json" self.config = self._load_config() self.threads = {} ``` 2. `spawn()` creates a teammate and starts its agent loop in a thread. ```python def spawn(self, name: str, role: str, prompt: str) -> str: member = {"name": name, "role": role, "status": "working"} self.config["members"].append(member) self._save_config() thread = threading.Thread( target=self._teammate_loop, args=(name, role, prompt), daemon=True) thread.start() return f"Spawned teammate '{name}' (role: {role})" ``` 3. MessageBus: append-only JSONL inboxes. `send()` appends a JSON line; `read_inbox()` reads all and drains. ```python class MessageBus: def send(self, sender, to, content, msg_type="message", extra=None): msg = {"type": msg_type, "from": sender, "content": content, "timestamp": time.time()} if extra: msg.update(extra) with open(self.dir / f"{to}.jsonl", "a") as f: f.write(json.dumps(msg) + "\n") def read_inbox(self, name): path = self.dir / f"{name}.jsonl" if not path.exists(): return "[]" msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l] path.write_text("") # drain return json.dumps(msgs, indent=2) ``` 4. Each teammate checks its inbox before every LLM call, injecting received messages into context. ```python def _teammate_loop(self, name, role, prompt): messages = [{"role": "user", "content": prompt}] for _ in range(50): inbox = BUS.read_inbox(name) if inbox != "[]": messages.append({"role": "user", "content": f"{inbox}"}) messages.append({"role": "assistant", "content": "Noted inbox messages."}) response = client.messages.create(...) if response.stop_reason != "tool_use": break # execute tools, append results... self._find_member(name)["status"] = "idle" ``` ## What Changed From s08 | Component | Before (s08) | After (s09) | |----------------|------------------|----------------------------| | Tools | 6 | 9 (+spawn/send/read_inbox) | | Agents | Single | Lead + N teammates | | Persistence | None | config.json + JSONL inboxes| | Threads | Background cmds | Full agent loops per thread| | Lifecycle | Fire-and-forget | idle -> working -> idle | | Communication | None | message + broadcast | ## Try It ```sh cd learn-claude-code python agents/s09_agent_teams.py ``` 1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.` 2. `Broadcast "status update: phase 1 complete" to all teammates` 3. `Check the lead inbox for any messages` 4. Type `/team` to see the team roster with statuses 5. Type `/inbox` to manually check the lead's inbox ================================================ FILE: docs/en/s10-team-protocols.md ================================================ # s10: Team Protocols `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12` > *"Teammates need shared communication rules"* -- one request-response pattern drives all negotiation. > > **Harness layer**: Protocols -- structured handshakes between models. ## Problem In s09, teammates work and communicate but lack structured coordination: **Shutdown**: Killing a thread leaves files half-written and config.json stale. You need a handshake: the lead requests, the teammate approves (finish and exit) or rejects (keep working). **Plan approval**: When the lead says "refactor the auth module," the teammate starts immediately. For high-risk changes, the lead should review the plan first. Both share the same structure: one side sends a request with a unique ID, the other responds referencing that ID. ## Solution ``` Shutdown Protocol Plan Approval Protocol ================== ====================== Lead Teammate Teammate Lead | | | | |--shutdown_req-->| |--plan_req------>| | {req_id:"abc"} | | {req_id:"xyz"} | | | | | |<--shutdown_resp-| |<--plan_resp-----| | {req_id:"abc", | | {req_id:"xyz", | | approve:true} | | approve:true} | Shared FSM: [pending] --approve--> [approved] [pending] --reject---> [rejected] Trackers: shutdown_requests = {req_id: {target, status}} plan_requests = {req_id: {from, plan, status}} ``` ## How It Works 1. The lead initiates shutdown by generating a request_id and sending through the inbox. ```python shutdown_requests = {} def handle_shutdown_request(teammate: str) -> str: req_id = str(uuid.uuid4())[:8] shutdown_requests[req_id] = {"target": teammate, "status": "pending"} BUS.send("lead", teammate, "Please shut down gracefully.", "shutdown_request", {"request_id": req_id}) return f"Shutdown request {req_id} sent (status: pending)" ``` 2. The teammate receives the request and responds with approve/reject. ```python if tool_name == "shutdown_response": req_id = args["request_id"] approve = args["approve"] shutdown_requests[req_id]["status"] = "approved" if approve else "rejected" BUS.send(sender, "lead", args.get("reason", ""), "shutdown_response", {"request_id": req_id, "approve": approve}) ``` 3. Plan approval follows the identical pattern. The teammate submits a plan (generating a request_id), the lead reviews (referencing the same request_id). ```python plan_requests = {} def handle_plan_review(request_id, approve, feedback=""): req = plan_requests[request_id] req["status"] = "approved" if approve else "rejected" BUS.send("lead", req["from"], feedback, "plan_approval_response", {"request_id": request_id, "approve": approve}) ``` One FSM, two applications. The same `pending -> approved | rejected` state machine handles any request-response protocol. ## What Changed From s09 | Component | Before (s09) | After (s10) | |----------------|------------------|------------------------------| | Tools | 9 | 12 (+shutdown_req/resp +plan)| | Shutdown | Natural exit only| Request-response handshake | | Plan gating | None | Submit/review with approval | | Correlation | None | request_id per request | | FSM | None | pending -> approved/rejected | ## Try It ```sh cd learn-claude-code python agents/s10_team_protocols.py ``` 1. `Spawn alice as a coder. Then request her shutdown.` 2. `List teammates to see alice's status after shutdown approval` 3. `Spawn bob with a risky refactoring task. Review and reject his plan.` 4. `Spawn charlie, have him submit a plan, then approve it.` 5. Type `/team` to monitor statuses ================================================ FILE: docs/en/s11-autonomous-agents.md ================================================ # s11: Autonomous Agents `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12` > *"Teammates scan the board and claim tasks themselves"* -- no need for the lead to assign each one. > > **Harness layer**: Autonomy -- models that find work without being told. ## Problem In s09-s10, teammates only work when explicitly told to. The lead must spawn each one with a specific prompt. 10 unclaimed tasks on the board? The lead assigns each one manually. Doesn't scale. True autonomy: teammates scan the task board themselves, claim unclaimed tasks, work on them, then look for more. One subtlety: after context compression (s06), the agent might forget who it is. Identity re-injection fixes this. ## Solution ``` Teammate lifecycle with idle cycle: +-------+ | spawn | +---+---+ | v +-------+ tool_use +-------+ | WORK | <------------- | LLM | +---+---+ +-------+ | | stop_reason != tool_use (or idle tool called) v +--------+ | IDLE | poll every 5s for up to 60s +---+----+ | +---> check inbox --> message? ----------> WORK | +---> scan .tasks/ --> unclaimed? -------> claim -> WORK | +---> 60s timeout ----------------------> SHUTDOWN Identity re-injection after compression: if len(messages) <= 3: messages.insert(0, identity_block) ``` ## How It Works 1. The teammate loop has two phases: WORK and IDLE. When the LLM stops calling tools (or calls `idle`), the teammate enters IDLE. ```python def _loop(self, name, role, prompt): while True: # -- WORK PHASE -- messages = [{"role": "user", "content": prompt}] for _ in range(50): response = client.messages.create(...) if response.stop_reason != "tool_use": break # execute tools... if idle_requested: break # -- IDLE PHASE -- self._set_status(name, "idle") resume = self._idle_poll(name, messages) if not resume: self._set_status(name, "shutdown") return self._set_status(name, "working") ``` 2. The idle phase polls inbox and task board in a loop. ```python def _idle_poll(self, name, messages): for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12 time.sleep(POLL_INTERVAL) inbox = BUS.read_inbox(name) if inbox: messages.append({"role": "user", "content": f"{inbox}"}) return True unclaimed = scan_unclaimed_tasks() if unclaimed: claim_task(unclaimed[0]["id"], name) messages.append({"role": "user", "content": f"Task #{unclaimed[0]['id']}: " f"{unclaimed[0]['subject']}"}) return True return False # timeout -> shutdown ``` 3. Task board scanning: find pending, unowned, unblocked tasks. ```python def scan_unclaimed_tasks() -> list: unclaimed = [] for f in sorted(TASKS_DIR.glob("task_*.json")): task = json.loads(f.read_text()) if (task.get("status") == "pending" and not task.get("owner") and not task.get("blockedBy")): unclaimed.append(task) return unclaimed ``` 4. Identity re-injection: when context is too short (compression happened), insert an identity block. ```python if len(messages) <= 3: messages.insert(0, {"role": "user", "content": f"You are '{name}', role: {role}, " f"team: {team_name}. Continue your work."}) messages.insert(1, {"role": "assistant", "content": f"I am {name}. Continuing."}) ``` ## What Changed From s10 | Component | Before (s10) | After (s11) | |----------------|------------------|----------------------------| | Tools | 12 | 14 (+idle, +claim_task) | | Autonomy | Lead-directed | Self-organizing | | Idle phase | None | Poll inbox + task board | | Task claiming | Manual only | Auto-claim unclaimed tasks | | Identity | System prompt | + re-injection after compress| | Timeout | None | 60s idle -> auto shutdown | ## Try It ```sh cd learn-claude-code python agents/s11_autonomous_agents.py ``` 1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.` 2. `Spawn a coder teammate and let it find work from the task board itself` 3. `Create tasks with dependencies. Watch teammates respect the blocked order.` 4. Type `/tasks` to see the task board with owners 5. Type `/team` to monitor who is working vs idle ================================================ FILE: docs/en/s12-worktree-task-isolation.md ================================================ # s12: Worktree + Task Isolation `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]` > *"Each works in its own directory, no interference"* -- tasks manage goals, worktrees manage directories, bound by ID. > > **Harness layer**: Directory isolation -- parallel execution lanes that never collide. ## Problem By s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Two agents refactoring different modules at the same time will collide: agent A edits `config.py`, agent B edits `config.py`, unstaged changes mix, and neither can roll back cleanly. The task board tracks *what to do* but has no opinion about *where to do it*. The fix: give each task its own git worktree directory. Tasks manage goals, worktrees manage execution context. Bind them by task ID. ## Solution ``` Control plane (.tasks/) Execution plane (.worktrees/) +------------------+ +------------------------+ | task_1.json | | auth-refactor/ | | status: in_progress <------> branch: wt/auth-refactor | worktree: "auth-refactor" | task_id: 1 | +------------------+ +------------------------+ | task_2.json | | ui-login/ | | status: pending <------> branch: wt/ui-login | worktree: "ui-login" | task_id: 2 | +------------------+ +------------------------+ | index.json (worktree registry) events.jsonl (lifecycle log) State machines: Task: pending -> in_progress -> completed Worktree: absent -> active -> removed | kept ``` ## How It Works 1. **Create a task.** Persist the goal first. ```python TASKS.create("Implement auth refactor") # -> .tasks/task_1.json status=pending worktree="" ``` 2. **Create a worktree and bind to the task.** Passing `task_id` auto-advances the task to `in_progress`. ```python WORKTREES.create("auth-refactor", task_id=1) # -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD # -> index.json gets new entry, task_1.json gets worktree="auth-refactor" ``` The binding writes state to both sides: ```python def bind_worktree(self, task_id, worktree): task = self._load(task_id) task["worktree"] = worktree if task["status"] == "pending": task["status"] = "in_progress" self._save(task) ``` 3. **Run commands in the worktree.** `cwd` points to the isolated directory. ```python subprocess.run(command, shell=True, cwd=worktree_path, capture_output=True, text=True, timeout=300) ``` 4. **Close out.** Two choices: - `worktree_keep(name)` -- preserve the directory for later. - `worktree_remove(name, complete_task=True)` -- remove directory, complete the bound task, emit event. One call handles teardown + completion. ```python def remove(self, name, force=False, complete_task=False): self._run_git(["worktree", "remove", wt["path"]]) if complete_task and wt.get("task_id") is not None: self.tasks.update(wt["task_id"], status="completed") self.tasks.unbind_worktree(wt["task_id"]) self.events.emit("task.completed", ...) ``` 5. **Event stream.** Every lifecycle step emits to `.worktrees/events.jsonl`: ```json { "event": "worktree.remove.after", "task": {"id": 1, "status": "completed"}, "worktree": {"name": "auth-refactor", "status": "removed"}, "ts": 1730000000 } ``` Events emitted: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`. After a crash, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Conversation memory is volatile; file state is durable. ## What Changed From s11 | Component | Before (s11) | After (s12) | |--------------------|----------------------------|----------------------------------------------| | Coordination | Task board (owner/status) | Task board + explicit worktree binding | | Execution scope | Shared directory | Task-scoped isolated directory | | Recoverability | Task status only | Task status + worktree index | | Teardown | Task completion | Task completion + explicit keep/remove | | Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` | ## Try It ```sh cd learn-claude-code python agents/s12_worktree_task_isolation.py ``` 1. `Create tasks for backend auth and frontend login page, then list tasks.` 2. `Create worktree "auth-refactor" for task 1, then bind task 2 to a new worktree "ui-login".` 3. `Run "git status --short" in worktree "auth-refactor".` 4. `Keep worktree "ui-login", then list worktrees and inspect events.` 5. `Remove worktree "auth-refactor" with complete_task=true, then list tasks/worktrees/events.` ================================================ FILE: docs/ja/s01-the-agent-loop.md ================================================ # s01: The Agent Loop `[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"One loop & Bash is all you need"* -- 1つのツール + 1つのループ = エージェント。 > > **Harness 層**: ループ -- モデルと現実世界を繋ぐ最初の接点。 ## 問題 言語モデルはコードについて推論できるが、現実世界に触れられない。ファイルを読めず、テストを実行できず、エラーを確認できない。ループがなければ、ツール呼び出しのたびにユーザーが手動で結果をコピーペーストする必要がある。つまりユーザー自身がループになる。 ## 解決策 ``` +--------+ +-------+ +---------+ | User | ---> | LLM | ---> | Tool | | prompt | | | | execute | +--------+ +---+---+ +----+----+ ^ | | tool_result | +----------------+ (loop until stop_reason != "tool_use") ``` 1つの終了条件がフロー全体を制御する。モデルがツール呼び出しを止めるまでループが回り続ける。 ## 仕組み 1. ユーザーのプロンプトが最初のメッセージになる。 ```python messages.append({"role": "user", "content": query}) ``` 2. メッセージとツール定義をLLMに送信する。 ```python response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) ``` 3. アシスタントのレスポンスを追加し、`stop_reason`を確認する。ツールが呼ばれなければ終了。 ```python messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return ``` 4. 各ツール呼び出しを実行し、結果を収集してuserメッセージとして追加。ステップ2に戻る。 ```python results = [] for block in response.content: if block.type == "tool_use": output = run_bash(block.input["command"]) results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) messages.append({"role": "user", "content": results}) ``` 1つの関数にまとめると: ```python def agent_loop(query): messages = [{"role": "user", "content": query}] while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": output = run_bash(block.input["command"]) results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) messages.append({"role": "user", "content": results}) ``` これでエージェント全体が30行未満に収まる。本コースの残りはすべてこのループの上に積み重なる -- ループ自体は変わらない。 ## 変更点 | Component | Before | After | |---------------|------------|--------------------------------| | Agent loop | (none) | `while True` + stop_reason | | Tools | (none) | `bash` (one tool) | | Messages | (none) | Accumulating list | | Control flow | (none) | `stop_reason != "tool_use"` | ## 試してみる ```sh cd learn-claude-code python agents/s01_agent_loop.py ``` 1. `Create a file called hello.py that prints "Hello, World!"` 2. `List all Python files in this directory` 3. `What is the current git branch?` 4. `Create a directory called test_output and write 3 files in it` ================================================ FILE: docs/ja/s02-tool-use.md ================================================ # s02: Tool Use `s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"ツールを足すなら、ハンドラーを1つ足すだけ"* -- ループは変わらない。新ツールは dispatch map に登録するだけ。 > > **Harness 層**: ツール分配 -- モデルが届く範囲を広げる。 ## 問題 `bash`だけでは、エージェントは何でもシェル経由で行う。`cat`は予測不能に切り詰め、`sed`は特殊文字で壊れ、すべてのbash呼び出しが制約のないセキュリティ面になる。`read_file`や`write_file`のような専用ツールなら、ツールレベルでパスのサンドボックス化を強制できる。 重要な点: ツールを追加してもループの変更は不要。 ## 解決策 ``` +--------+ +-------+ +------------------+ | User | ---> | LLM | ---> | Tool Dispatch | | prompt | | | | { | +--------+ +---+---+ | bash: run_bash | ^ | read: run_read | | | write: run_wr | +-----------+ edit: run_edit | tool_result | } | +------------------+ The dispatch map is a dict: {tool_name: handler_function}. One lookup replaces any if/elif chain. ``` ## 仕組み 1. 各ツールにハンドラ関数を定義する。パスのサンドボックス化でワークスペース外への脱出を防ぐ。 ```python def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_read(path: str, limit: int = None) -> str: text = safe_path(path).read_text() lines = text.splitlines() if limit and limit < len(lines): lines = lines[:limit] return "\n".join(lines)[:50000] ``` 2. ディスパッチマップがツール名とハンドラを結びつける。 ```python TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), } ``` 3. ループ内で名前によりハンドラをルックアップする。ループ本体はs01から不変。 ```python for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) output = handler(**block.input) if handler \ else f"Unknown tool: {block.name}" results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) ``` ツール追加 = ハンドラ追加 + スキーマ追加。ループは決して変わらない。 ## s01からの変更点 | Component | Before (s01) | After (s02) | |----------------|--------------------|----------------------------| | Tools | 1 (bash only) | 4 (bash, read, write, edit)| | Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict | | Path safety | None | `safe_path()` sandbox | | Agent loop | Unchanged | Unchanged | ## 試してみる ```sh cd learn-claude-code python agents/s02_tool_use.py ``` 1. `Read the file requirements.txt` 2. `Create a file called greet.py with a greet(name) function` 3. `Edit greet.py to add a docstring to the function` 4. `Read greet.py to verify the edit worked` ================================================ FILE: docs/ja/s03-todo-write.md ================================================ # s03: TodoWrite `s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"計画のないエージェントは行き当たりばったり"* -- まずステップを書き出し、それから実行。 > > **Harness 層**: 計画 -- 航路を描かずにモデルを軌道に乗せる。 ## 問題 マルチステップのタスクで、モデルは途中で迷子になる。作業を繰り返したり、ステップを飛ばしたり、脱線したりする。長い会話になるほど悪化する -- ツール結果がコンテキストを埋めるにつれ、システムプロンプトの影響力が薄れる。10ステップのリファクタリングでステップ1-3を完了した後、残りを忘れて即興を始めてしまう。 ## 解決策 ``` +--------+ +-------+ +---------+ | User | ---> | LLM | ---> | Tools | | prompt | | | | + todo | +--------+ +---+---+ +----+----+ ^ | | tool_result | +----------------+ | +-----------+-----------+ | TodoManager state | | [ ] task A | | [>] task B <- doing | | [x] task C | +-----------------------+ | if rounds_since_todo >= 3: inject into tool_result ``` ## 仕組み 1. TodoManagerはアイテムのリストをステータス付きで保持する。`in_progress`にできるのは同時に1つだけ。 ```python class TodoManager: def update(self, items: list) -> str: validated, in_progress_count = [], 0 for item in items: status = item.get("status", "pending") if status == "in_progress": in_progress_count += 1 validated.append({"id": item["id"], "text": item["text"], "status": status}) if in_progress_count > 1: raise ValueError("Only one task can be in_progress") self.items = validated return self.render() ``` 2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。 ```python TOOL_HANDLERS = { # ...base tools... "todo": lambda **kw: TODO.update(kw["items"]), } ``` 3. nagリマインダーが、モデルが3ラウンド以上`todo`を呼ばなかった場合にナッジを注入する。 ```python if rounds_since_todo >= 3 and messages: last = messages[-1] if last["role"] == "user" and isinstance(last.get("content"), list): last["content"].insert(0, { "type": "text", "text": "Update your todos.", }) ``` 「一度にin_progressは1つだけ」の制約が逐次的な集中を強制し、nagリマインダーが説明責任を生む。 ## s02からの変更点 | Component | Before (s02) | After (s03) | |----------------|------------------|----------------------------| | Tools | 4 | 5 (+todo) | | Planning | None | TodoManager with statuses | | Nag injection | None | `` after 3 rounds| | Agent loop | Simple dispatch | + rounds_since_todo counter| ## 試してみる ```sh cd learn-claude-code python agents/s03_todo_write.py ``` 1. `Refactor the file hello.py: add type hints, docstrings, and a main guard` 2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py` 3. `Review all Python files and fix any style issues` ================================================ FILE: docs/ja/s04-subagent.md ================================================ # s04: Subagents `s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"大きなタスクを分割し、各サブタスクにクリーンなコンテキストを"* -- サブエージェントは独立した messages[] を使い、メイン会話を汚さない。 > > **Harness 層**: コンテキスト隔離 -- モデルの思考の明晰さを守る。 ## 問題 エージェントが作業するにつれ、messages配列は膨張し続ける。すべてのファイル読み取り、すべてのbash出力がコンテキストに永久に残る。「このプロジェクトはどのテストフレームワークを使っているか」という質問は5つのファイルを読む必要があるかもしれないが、親に必要なのは「pytest」という答えだけだ。 ## 解決策 ``` Parent agent Subagent +------------------+ +------------------+ | messages=[...] | | messages=[] | <-- fresh | | dispatch | | | tool: task | ----------> | while tool_use: | | prompt="..." | | call tools | | | summary | append results | | result = "..." | <---------- | return last text | +------------------+ +------------------+ Parent context stays clean. Subagent context is discarded. ``` ## 仕組み 1. 親に`task`ツールを追加する。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。 ```python PARENT_TOOLS = CHILD_TOOLS + [ {"name": "task", "description": "Spawn a subagent with fresh context.", "input_schema": { "type": "object", "properties": {"prompt": {"type": "string"}}, "required": ["prompt"], }}, ] ``` 2. サブエージェントは`messages=[]`で開始し、自身のループを実行する。最終テキストだけが親に返る。 ```python def run_subagent(prompt: str) -> str: sub_messages = [{"role": "user", "content": prompt}] for _ in range(30): # safety limit response = client.messages.create( model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages, tools=CHILD_TOOLS, max_tokens=8000, ) sub_messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": break results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) output = handler(**block.input) results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)[:50000]}) sub_messages.append({"role": "user", "content": results}) return "".join( b.text for b in response.content if hasattr(b, "text") ) or "(no summary)" ``` 子のメッセージ履歴全体(30回以上のツール呼び出し)は破棄される。親は1段落の要約を通常の`tool_result`として受け取る。 ## s03からの変更点 | Component | Before (s03) | After (s04) | |----------------|------------------|---------------------------| | Tools | 5 | 5 (base) + task (parent) | | Context | Single shared | Parent + child isolation | | Subagent | None | `run_subagent()` function | | Return value | N/A | Summary text only | ## 試してみる ```sh cd learn-claude-code python agents/s04_subagent.py ``` 1. `Use a subtask to find what testing framework this project uses` 2. `Delegate: read all .py files and summarize what each one does` 3. `Use a task to create a new module, then verify it from here` ================================================ FILE: docs/ja/s05-skill-loading.md ================================================ # s05: Skills `s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"必要な知識を、必要な時に読み込む"* -- system prompt ではなく tool_result で注入。 > > **Harness 層**: オンデマンド知識 -- モデルが求めた時だけ渡すドメイン専門性。 ## 問題 エージェントにドメイン固有のワークフローを遵守させたい: gitの規約、テストパターン、コードレビューチェックリスト。すべてをシステムプロンプトに入れると、使われないスキルにトークンを浪費する。10スキル x 2000トークン = 20,000トークン、ほとんどが任意のタスクに無関係だ。 ## 解決策 ``` System prompt (Layer 1 -- always present): +--------------------------------------+ | You are a coding agent. | | Skills available: | | - git: Git workflow helpers | ~100 tokens/skill | - test: Testing best practices | +--------------------------------------+ When model calls load_skill("git"): +--------------------------------------+ | tool_result (Layer 2 -- on demand): | | | | Full git workflow instructions... | ~2000 tokens | Step 1: ... | | | +--------------------------------------+ ``` 第1層: スキル*名*をシステムプロンプトに(低コスト)。第2層: スキル*本体*をtool_resultに(オンデマンド)。 ## 仕組み 1. 各スキルは `SKILL.md` ファイルを含むディレクトリとして配置される。 ``` skills/ pdf/ SKILL.md # ---\n name: pdf\n description: Process PDF files\n ---\n ... code-review/ SKILL.md # ---\n name: code-review\n description: Review code\n ---\n ... ``` 2. SkillLoaderが `SKILL.md` を再帰的に探索し、ディレクトリ名をスキル識別子として使用する。 ```python class SkillLoader: def __init__(self, skills_dir: Path): self.skills = {} for f in sorted(skills_dir.rglob("SKILL.md")): text = f.read_text() meta, body = self._parse_frontmatter(text) name = meta.get("name", f.parent.name) self.skills[name] = {"meta": meta, "body": body} def get_descriptions(self) -> str: lines = [] for name, skill in self.skills.items(): desc = skill["meta"].get("description", "") lines.append(f" - {name}: {desc}") return "\n".join(lines) def get_content(self, name: str) -> str: skill = self.skills.get(name) if not skill: return f"Error: Unknown skill '{name}'." return f"\n{skill['body']}\n" ``` 3. 第1層はシステムプロンプトに配置。第2層は通常のツールハンドラ。 ```python SYSTEM = f"""You are a coding agent at {WORKDIR}. Skills available: {SKILL_LOADER.get_descriptions()}""" TOOL_HANDLERS = { # ...base tools... "load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]), } ``` モデルはどのスキルが存在するかを知り(低コスト)、関連する時にだけ読み込む(高コスト)。 ## s04からの変更点 | Component | Before (s04) | After (s05) | |----------------|------------------|----------------------------| | Tools | 5 (base + task) | 5 (base + load_skill) | | System prompt | Static string | + skill descriptions | | Knowledge | None | skills/\*/SKILL.md files | | Injection | None | Two-layer (system + result)| ## 試してみる ```sh cd learn-claude-code python agents/s05_skill_loading.py ``` 1. `What skills are available?` 2. `Load the agent-builder skill and follow its instructions` 3. `I need to do a code review -- load the relevant skill first` 4. `Build an MCP server using the mcp-builder skill` ================================================ FILE: docs/ja/s06-context-compact.md ================================================ # s06: Context Compact `s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12` > *"コンテキストはいつか溢れる、空ける手段が要る"* -- 3層圧縮で無限セッションを実現。 > > **Harness 層**: 圧縮 -- クリーンな記憶、無限のセッション。 ## 問題 コンテキストウィンドウは有限だ。1000行のファイルに対する`read_file`1回で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン超。圧縮なしでは、エージェントは大規模コードベースで作業できない。 ## 解決策 積極性を段階的に上げる3層構成: ``` Every turn: +------------------+ | Tool call result | +------------------+ | v [Layer 1: micro_compact] (silent, every turn) Replace tool_result > 3 turns old with "[Previous: used {tool_name}]" | v [Check: tokens > 50000?] | | no yes | | v v continue [Layer 2: auto_compact] Save transcript to .transcripts/ LLM summarizes conversation. Replace all messages with [summary]. | v [Layer 3: compact tool] Model calls compact explicitly. Same summarization as auto_compact. ``` ## 仕組み 1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、古いツール結果をプレースホルダーに置換する。 ```python def micro_compact(messages: list) -> list: tool_results = [] for i, msg in enumerate(messages): if msg["role"] == "user" and isinstance(msg.get("content"), list): for j, part in enumerate(msg["content"]): if isinstance(part, dict) and part.get("type") == "tool_result": tool_results.append((i, j, part)) if len(tool_results) <= KEEP_RECENT: return messages for _, _, part in tool_results[:-KEEP_RECENT]: if len(part.get("content", "")) > 100: part["content"] = f"[Previous: used {tool_name}]" return messages ``` 2. **第2層 -- auto_compact**: トークンが閾値を超えたら、完全なトランスクリプトをディスクに保存し、LLMに要約を依頼する。 ```python def auto_compact(messages: list) -> list: # Save transcript for recovery transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl" with open(transcript_path, "w") as f: for msg in messages: f.write(json.dumps(msg, default=str) + "\n") # LLM summarizes response = client.messages.create( model=MODEL, messages=[{"role": "user", "content": "Summarize this conversation for continuity..." + json.dumps(messages, default=str)[:80000]}], max_tokens=2000, ) return [ {"role": "user", "content": f"[Compressed]\n\n{response.content[0].text}"}, {"role": "assistant", "content": "Understood. Continuing."}, ] ``` 3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。 4. ループが3層すべてを統合する: ```python def agent_loop(messages: list): while True: micro_compact(messages) # Layer 1 if estimate_tokens(messages) > THRESHOLD: messages[:] = auto_compact(messages) # Layer 2 response = client.messages.create(...) # ... tool execution ... if manual_compact: messages[:] = auto_compact(messages) # Layer 3 ``` トランスクリプトがディスク上に完全な履歴を保持する。何も真に失われず、アクティブなコンテキストの外に移動されるだけ。 ## s05からの変更点 | Component | Before (s05) | After (s06) | |----------------|------------------|----------------------------| | Tools | 5 | 5 (base + compact) | | Context mgmt | None | Three-layer compression | | Micro-compact | None | Old results -> placeholders| | Auto-compact | None | Token threshold trigger | | Transcripts | None | Saved to .transcripts/ | ## 試してみる ```sh cd learn-claude-code python agents/s06_context_compact.py ``` 1. `Read every Python file in the agents/ directory one by one` (micro-compactが古い結果を置換するのを観察する) 2. `Keep reading files until compression triggers automatically` 3. `Use the compact tool to manually compress the conversation` ================================================ FILE: docs/ja/s07-task-system.md ================================================ # s07: Task System `s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12` > *"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する"* -- ファイルベースのタスクグラフ、マルチエージェント協調の基盤。 > > **Harness 層**: 永続タスク -- どの会話よりも長く生きる目標。 ## 問題 s03のTodoManagerはメモリ上のフラットなチェックリストに過ぎない: 順序なし、依存関係なし、ステータスは完了か未完了のみ。実際の目標には構造がある -- タスクBはタスクAに依存し、タスクCとDは並行実行でき、タスクEはCとDの両方を待つ。 明示的な関係がなければ、エージェントは何が実行可能で、何がブロックされ、何が同時に走れるかを判断できない。しかもリストはメモリ上にしかないため、コンテキスト圧縮(s06)で消える。 ## 解決策 フラットなチェックリストをディスクに永続化する**タスクグラフ**に昇格させる。各タスクは1つのJSONファイルで、ステータス・前方依存(`blockedBy`)・後方依存(`blocks`)を持つ。タスクグラフは常に3つの問いに答える: - **何が実行可能か?** -- `pending`ステータスで`blockedBy`が空のタスク。 - **何がブロックされているか?** -- 未完了の依存を待つタスク。 - **何が完了したか?** -- `completed`のタスク。完了時に後続タスクを自動的にアンブロックする。 ``` .tasks/ task_1.json {"id":1, "status":"completed"} task_2.json {"id":2, "blockedBy":[1], "status":"pending"} task_3.json {"id":3, "blockedBy":[1], "status":"pending"} task_4.json {"id":4, "blockedBy":[2,3], "status":"pending"} タスクグラフ (DAG): +----------+ +--> | task 2 | --+ | | pending | | +----------+ +----------+ +--> +----------+ | task 1 | | task 4 | | completed| --> +----------+ +--> | blocked | +----------+ | task 3 | --+ +----------+ | pending | +----------+ 順序: task 1 は 2 と 3 より先に完了する必要がある 並行: task 2 と 3 は同時に実行できる 依存: task 4 は 2 と 3 の両方を待つ ステータス: pending -> in_progress -> completed ``` このタスクグラフは s07 以降の全メカニズムの協調バックボーンとなる: バックグラウンド実行(s08)、マルチエージェントチーム(s09+)、worktree分離(s12)はすべてこの同じ構造を読み書きする。 ## 仕組み 1. **TaskManager**: タスクごとに1つのJSONファイル、依存グラフ付きCRUD。 ```python class TaskManager: def __init__(self, tasks_dir: Path): self.dir = tasks_dir self.dir.mkdir(exist_ok=True) self._next_id = self._max_id() + 1 def create(self, subject, description=""): task = {"id": self._next_id, "subject": subject, "status": "pending", "blockedBy": [], "blocks": [], "owner": ""} self._save(task) self._next_id += 1 return json.dumps(task, indent=2) ``` 2. **依存解除**: タスク完了時に、他タスクの`blockedBy`リストから完了IDを除去し、後続タスクをアンブロックする。 ```python def _clear_dependency(self, completed_id): for f in self.dir.glob("task_*.json"): task = json.loads(f.read_text()) if completed_id in task.get("blockedBy", []): task["blockedBy"].remove(completed_id) self._save(task) ``` 3. **ステータス遷移 + 依存配線**: `update`がステータス変更と依存エッジを担う。 ```python def update(self, task_id, status=None, add_blocked_by=None, add_blocks=None): task = self._load(task_id) if status: task["status"] = status if status == "completed": self._clear_dependency(task_id) self._save(task) ``` 4. 4つのタスクツールをディスパッチマップに追加する。 ```python TOOL_HANDLERS = { # ...base tools... "task_create": lambda **kw: TASKS.create(kw["subject"]), "task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status")), "task_list": lambda **kw: TASKS.list_all(), "task_get": lambda **kw: TASKS.get(kw["task_id"]), } ``` s07以降、タスクグラフがマルチステップ作業のデフォルト。s03のTodoは軽量な単一セッション用チェックリストとして残る。 ## s06からの変更点 | コンポーネント | Before (s06) | After (s07) | |---|---|---| | Tools | 5 | 8 (`task_create/update/list/get`) | | 計画モデル | フラットチェックリスト (メモリ) | 依存関係付きタスクグラフ (ディスク) | | 関係 | なし | `blockedBy` + `blocks` エッジ | | ステータス追跡 | 完了か未完了 | `pending` -> `in_progress` -> `completed` | | 永続性 | 圧縮で消失 | 圧縮・再起動後も存続 | ## 試してみる ```sh cd learn-claude-code python agents/s07_task_system.py ``` 1. `Create 3 tasks: "Setup project", "Write code", "Write tests". Make them depend on each other in order.` 2. `List all tasks and show the dependency graph` 3. `Complete task 1 and then list tasks to see task 2 unblocked` 4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse` ================================================ FILE: docs/ja/s08-background-tasks.md ================================================ # s08: Background Tasks `s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12` > *"遅い操作はバックグラウンドへ、エージェントは次を考え続ける"* -- デーモンスレッドがコマンド実行、完了後に通知を注入。 > > **Harness 層**: バックグラウンド実行 -- モデルが考え続ける間、Harness が待つ。 ## 問題 一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングループでは、モデルはサブプロセスの完了を待って座っている。ユーザーが「依存関係をインストールして、その間にconfigファイルを作って」と言っても、エージェントは並列ではなく逐次的に処理する。 ## 解決策 ``` Main thread Background thread +-----------------+ +-----------------+ | agent loop | | subprocess runs | | ... | | ... | | [LLM call] <---+------- | enqueue(result) | | ^drain queue | +-----------------+ +-----------------+ Timeline: Agent --[spawn A]--[spawn B]--[other work]---- | | v v [A runs] [B runs] (parallel) | | +-- results injected before next LLM call --+ ``` ## 仕組み 1. BackgroundManagerがスレッドセーフな通知キューでタスクを追跡する。 ```python class BackgroundManager: def __init__(self): self.tasks = {} self._notification_queue = [] self._lock = threading.Lock() ``` 2. `run()`がデーモンスレッドを開始し、即座にリターンする。 ```python def run(self, command: str) -> str: task_id = str(uuid.uuid4())[:8] self.tasks[task_id] = {"status": "running", "command": command} thread = threading.Thread( target=self._execute, args=(task_id, command), daemon=True) thread.start() return f"Background task {task_id} started" ``` 3. サブプロセス完了時に、結果を通知キューへ。 ```python def _execute(self, task_id, command): try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=300) output = (r.stdout + r.stderr).strip()[:50000] except subprocess.TimeoutExpired: output = "Error: Timeout (300s)" with self._lock: self._notification_queue.append({ "task_id": task_id, "result": output[:500]}) ``` 4. エージェントループが各LLM呼び出しの前に通知をドレインする。 ```python def agent_loop(messages: list): while True: notifs = BG.drain_notifications() if notifs: notif_text = "\n".join( f"[bg:{n['task_id']}] {n['result']}" for n in notifs) messages.append({"role": "user", "content": f"\n{notif_text}\n" f""}) messages.append({"role": "assistant", "content": "Noted background results."}) response = client.messages.create(...) ``` ループはシングルスレッドのまま。サブプロセスI/Oだけが並列化される。 ## s07からの変更点 | Component | Before (s07) | After (s08) | |----------------|------------------|----------------------------| | Tools | 8 | 6 (base + background_run + check)| | Execution | Blocking only | Blocking + background threads| | Notification | None | Queue drained per loop | | Concurrency | None | Daemon threads | ## 試してみる ```sh cd learn-claude-code python agents/s08_background_tasks.py ``` 1. `Run "sleep 5 && echo done" in the background, then create a file while it runs` 2. `Start 3 background tasks: "sleep 2", "sleep 4", "sleep 6". Check their status.` 3. `Run pytest in the background and keep working on other things` ================================================ FILE: docs/ja/s09-agent-teams.md ================================================ # s09: Agent Teams `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12` > *"一人で終わらないなら、チームメイトに任せる"* -- 永続チームメイト + 非同期メールボックス。 > > **Harness 層**: チームメールボックス -- 複数モデルをファイルで協調。 ## 問題 サブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定はできない。 本物のチームワークには: (1)単一プロンプトを超えて存続する永続エージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネルが必要だ。 ## 解決策 ``` Teammate lifecycle: spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN Communication: .team/ config.json <- team roster + statuses inbox/ alice.jsonl <- append-only, drain-on-read bob.jsonl lead.jsonl +--------+ send("alice","bob","...") +--------+ | alice | -----------------------------> | bob | | loop | bob.jsonl << {json_line} | loop | +--------+ +--------+ ^ | | BUS.read_inbox("alice") | +---- alice.jsonl -> read + drain ---------+ ``` ## 仕組み 1. TeammateManagerがconfig.jsonでチーム名簿を管理する。 ```python class TeammateManager: def __init__(self, team_dir: Path): self.dir = team_dir self.dir.mkdir(exist_ok=True) self.config_path = self.dir / "config.json" self.config = self._load_config() self.threads = {} ``` 2. `spawn()`がチームメイトを作成し、そのエージェントループをスレッドで開始する。 ```python def spawn(self, name: str, role: str, prompt: str) -> str: member = {"name": name, "role": role, "status": "working"} self.config["members"].append(member) self._save_config() thread = threading.Thread( target=self._teammate_loop, args=(name, role, prompt), daemon=True) thread.start() return f"Spawned teammate '{name}' (role: {role})" ``` 3. MessageBus: 追記専用のJSONLインボックス。`send()`がJSON行を追記し、`read_inbox()`がすべて読み取ってドレインする。 ```python class MessageBus: def send(self, sender, to, content, msg_type="message", extra=None): msg = {"type": msg_type, "from": sender, "content": content, "timestamp": time.time()} if extra: msg.update(extra) with open(self.dir / f"{to}.jsonl", "a") as f: f.write(json.dumps(msg) + "\n") def read_inbox(self, name): path = self.dir / f"{name}.jsonl" if not path.exists(): return "[]" msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l] path.write_text("") # drain return json.dumps(msgs, indent=2) ``` 4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージをコンテキストに注入する。 ```python def _teammate_loop(self, name, role, prompt): messages = [{"role": "user", "content": prompt}] for _ in range(50): inbox = BUS.read_inbox(name) if inbox != "[]": messages.append({"role": "user", "content": f"{inbox}"}) messages.append({"role": "assistant", "content": "Noted inbox messages."}) response = client.messages.create(...) if response.stop_reason != "tool_use": break # execute tools, append results... self._find_member(name)["status"] = "idle" ``` ## s08からの変更点 | Component | Before (s08) | After (s09) | |----------------|------------------|----------------------------| | Tools | 6 | 9 (+spawn/send/read_inbox) | | Agents | Single | Lead + N teammates | | Persistence | None | config.json + JSONL inboxes| | Threads | Background cmds | Full agent loops per thread| | Lifecycle | Fire-and-forget | idle -> working -> idle | | Communication | None | message + broadcast | ## 試してみる ```sh cd learn-claude-code python agents/s09_agent_teams.py ``` 1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.` 2. `Broadcast "status update: phase 1 complete" to all teammates` 3. `Check the lead inbox for any messages` 4. `/team`と入力してステータス付きのチーム名簿を確認する 5. `/inbox`と入力してリーダーのインボックスを手動確認する ================================================ FILE: docs/ja/s10-team-protocols.md ================================================ # s10: Team Protocols `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12` > *"チームメイト間には統一の通信ルールが必要"* -- 1つの request-response パターンが全交渉を駆動。 > > **Harness 層**: プロトコル -- モデル間の構造化されたハンドシェイク。 ## 問題 s09ではチームメイトが作業し通信するが、構造化された協調がない: **シャットダウン**: スレッドを強制終了するとファイルが中途半端に書かれ、config.jsonが不正な状態になる。ハンドシェイクが必要 -- リーダーが要求し、チームメイトが承認(完了して退出)か拒否(作業継続)する。 **プラン承認**: リーダーが「認証モジュールをリファクタリングして」と言うと、チームメイトは即座に開始する。リスクの高い変更では、実行前にリーダーが計画をレビューすべきだ。 両方とも同じ構造: 一方がユニークIDを持つリクエストを送り、他方がそのIDで応答する。 ## 解決策 ``` Shutdown Protocol Plan Approval Protocol ================== ====================== Lead Teammate Teammate Lead | | | | |--shutdown_req-->| |--plan_req------>| | {req_id:"abc"} | | {req_id:"xyz"} | | | | | |<--shutdown_resp-| |<--plan_resp-----| | {req_id:"abc", | | {req_id:"xyz", | | approve:true} | | approve:true} | Shared FSM: [pending] --approve--> [approved] [pending] --reject---> [rejected] Trackers: shutdown_requests = {req_id: {target, status}} plan_requests = {req_id: {from, plan, status}} ``` ## 仕組み 1. リーダーがrequest_idを生成し、インボックス経由でシャットダウンを開始する。 ```python shutdown_requests = {} def handle_shutdown_request(teammate: str) -> str: req_id = str(uuid.uuid4())[:8] shutdown_requests[req_id] = {"target": teammate, "status": "pending"} BUS.send("lead", teammate, "Please shut down gracefully.", "shutdown_request", {"request_id": req_id}) return f"Shutdown request {req_id} sent (status: pending)" ``` 2. チームメイトがリクエストを受信し、承認または拒否で応答する。 ```python if tool_name == "shutdown_response": req_id = args["request_id"] approve = args["approve"] shutdown_requests[req_id]["status"] = "approved" if approve else "rejected" BUS.send(sender, "lead", args.get("reason", ""), "shutdown_response", {"request_id": req_id, "approve": approve}) ``` 3. プラン承認も同一パターン。チームメイトがプランを提出(request_idを生成)、リーダーがレビュー(同じrequest_idを参照)。 ```python plan_requests = {} def handle_plan_review(request_id, approve, feedback=""): req = plan_requests[request_id] req["status"] = "approved" if approve else "rejected" BUS.send("lead", req["from"], feedback, "plan_approval_response", {"request_id": request_id, "approve": approve}) ``` 1つのFSM、2つの応用。同じ`pending -> approved | rejected`状態機械が、あらゆるリクエスト-レスポンスプロトコルに適用できる。 ## s09からの変更点 | Component | Before (s09) | After (s10) | |----------------|------------------|------------------------------| | Tools | 9 | 12 (+shutdown_req/resp +plan)| | Shutdown | Natural exit only| Request-response handshake | | Plan gating | None | Submit/review with approval | | Correlation | None | request_id per request | | FSM | None | pending -> approved/rejected | ## 試してみる ```sh cd learn-claude-code python agents/s10_team_protocols.py ``` 1. `Spawn alice as a coder. Then request her shutdown.` 2. `List teammates to see alice's status after shutdown approval` 3. `Spawn bob with a risky refactoring task. Review and reject his plan.` 4. `Spawn charlie, have him submit a plan, then approve it.` 5. `/team`と入力してステータスを監視する ================================================ FILE: docs/ja/s11-autonomous-agents.md ================================================ # s11: Autonomous Agents `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12` > *"チームメイトが自らボードを見て、仕事を取る"* -- リーダーが逐一割り振る必要はない。 > > **Harness 層**: 自律 -- 指示なしで仕事を見つけるモデル。 ## 問題 s09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てる。これはスケールしない。 真の自律性とは、チームメイトが自分で作業を見つけること: タスクボードをスキャンし、未確保のタスクを確保し、作業し、完了したら次を探す。 もう1つの問題: コンテキスト圧縮(s06)後にエージェントが自分の正体を忘れる可能性がある。アイデンティティ再注入がこれを解決する。 ## 解決策 ``` Teammate lifecycle with idle cycle: +-------+ | spawn | +---+---+ | v +-------+ tool_use +-------+ | WORK | <------------- | LLM | +---+---+ +-------+ | | stop_reason != tool_use (or idle tool called) v +--------+ | IDLE | poll every 5s for up to 60s +---+----+ | +---> check inbox --> message? ----------> WORK | +---> scan .tasks/ --> unclaimed? -------> claim -> WORK | +---> 60s timeout ----------------------> SHUTDOWN Identity re-injection after compression: if len(messages) <= 3: messages.insert(0, identity_block) ``` ## 仕組み 1. チームメイトのループはWORKとIDLEの2フェーズ。LLMがツール呼び出しを止めた時(または`idle`ツールを呼んだ時)、IDLEフェーズに入る。 ```python def _loop(self, name, role, prompt): while True: # -- WORK PHASE -- messages = [{"role": "user", "content": prompt}] for _ in range(50): response = client.messages.create(...) if response.stop_reason != "tool_use": break # execute tools... if idle_requested: break # -- IDLE PHASE -- self._set_status(name, "idle") resume = self._idle_poll(name, messages) if not resume: self._set_status(name, "shutdown") return self._set_status(name, "working") ``` 2. IDLEフェーズがインボックスとタスクボードをポーリングする。 ```python def _idle_poll(self, name, messages): for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12 time.sleep(POLL_INTERVAL) inbox = BUS.read_inbox(name) if inbox: messages.append({"role": "user", "content": f"{inbox}"}) return True unclaimed = scan_unclaimed_tasks() if unclaimed: claim_task(unclaimed[0]["id"], name) messages.append({"role": "user", "content": f"Task #{unclaimed[0]['id']}: " f"{unclaimed[0]['subject']}"}) return True return False # timeout -> shutdown ``` 3. タスクボードスキャン: pendingかつ未割り当てかつブロックされていないタスクを探す。 ```python def scan_unclaimed_tasks() -> list: unclaimed = [] for f in sorted(TASKS_DIR.glob("task_*.json")): task = json.loads(f.read_text()) if (task.get("status") == "pending" and not task.get("owner") and not task.get("blockedBy")): unclaimed.append(task) return unclaimed ``` 4. アイデンティティ再注入: コンテキストが短すぎる(圧縮が起きた)場合にアイデンティティブロックを挿入する。 ```python if len(messages) <= 3: messages.insert(0, {"role": "user", "content": f"You are '{name}', role: {role}, " f"team: {team_name}. Continue your work."}) messages.insert(1, {"role": "assistant", "content": f"I am {name}. Continuing."}) ``` ## s10からの変更点 | Component | Before (s10) | After (s11) | |----------------|------------------|----------------------------| | Tools | 12 | 14 (+idle, +claim_task) | | Autonomy | Lead-directed | Self-organizing | | Idle phase | None | Poll inbox + task board | | Task claiming | Manual only | Auto-claim unclaimed tasks | | Identity | System prompt | + re-injection after compress| | Timeout | None | 60s idle -> auto shutdown | ## 試してみる ```sh cd learn-claude-code python agents/s11_autonomous_agents.py ``` 1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.` 2. `Spawn a coder teammate and let it find work from the task board itself` 3. `Create tasks with dependencies. Watch teammates respect the blocked order.` 4. `/tasks`と入力してオーナー付きのタスクボードを確認する 5. `/team`と入力して誰が作業中でアイドルかを監視する ================================================ FILE: docs/ja/s12-worktree-task-isolation.md ================================================ # s12: Worktree + Task Isolation `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]` > *"各自のディレクトリで作業し、互いに干渉しない"* -- タスクは目標を管理、worktree はディレクトリを管理、IDで紐付け。 > > **Harness 層**: ディレクトリ隔離 -- 決して衝突しない並列実行レーン。 ## 問題 s11までにエージェントはタスクを自律的に確保して完了できるようになった。しかし全タスクが1つの共有ディレクトリで走る。2つのエージェントが同時に異なるモジュールをリファクタリングすると衝突する: 片方が`config.py`を編集し、もう片方も`config.py`を編集し、未コミットの変更が混ざり合い、どちらもクリーンにロールバックできない。 タスクボードは*何をやるか*を追跡するが、*どこでやるか*には関知しない。解決策: 各タスクに専用のgit worktreeディレクトリを与える。タスクが目標を管理し、worktreeが実行コンテキストを管理する。タスクIDで紐付ける。 ## 解決策 ``` Control plane (.tasks/) Execution plane (.worktrees/) +------------------+ +------------------------+ | task_1.json | | auth-refactor/ | | status: in_progress <------> branch: wt/auth-refactor | worktree: "auth-refactor" | task_id: 1 | +------------------+ +------------------------+ | task_2.json | | ui-login/ | | status: pending <------> branch: wt/ui-login | worktree: "ui-login" | task_id: 2 | +------------------+ +------------------------+ | index.json (worktree registry) events.jsonl (lifecycle log) State machines: Task: pending -> in_progress -> completed Worktree: absent -> active -> removed | kept ``` ## 仕組み 1. **タスクを作成する。** まず目標を永続化する。 ```python TASKS.create("Implement auth refactor") # -> .tasks/task_1.json status=pending worktree="" ``` 2. **worktreeを作成してタスクに紐付ける。** `task_id`を渡すと、タスクが自動的に`in_progress`に遷移する。 ```python WORKTREES.create("auth-refactor", task_id=1) # -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD # -> index.json gets new entry, task_1.json gets worktree="auth-refactor" ``` 紐付けは両側に状態を書き込む: ```python def bind_worktree(self, task_id, worktree): task = self._load(task_id) task["worktree"] = worktree if task["status"] == "pending": task["status"] = "in_progress" self._save(task) ``` 3. **worktree内でコマンドを実行する。** `cwd`が分離ディレクトリを指す。 ```python subprocess.run(command, shell=True, cwd=worktree_path, capture_output=True, text=True, timeout=300) ``` 4. **終了処理。** 2つの選択肢: - `worktree_keep(name)` -- ディレクトリを保持する。 - `worktree_remove(name, complete_task=True)` -- ディレクトリを削除し、紐付けられたタスクを完了し、イベントを発行する。1回の呼び出しで後片付けと完了を処理する。 ```python def remove(self, name, force=False, complete_task=False): self._run_git(["worktree", "remove", wt["path"]]) if complete_task and wt.get("task_id") is not None: self.tasks.update(wt["task_id"], status="completed") self.tasks.unbind_worktree(wt["task_id"]) self.events.emit("task.completed", ...) ``` 5. **イベントストリーム。** ライフサイクルの各ステップが`.worktrees/events.jsonl`に記録される: ```json { "event": "worktree.remove.after", "task": {"id": 1, "status": "completed"}, "worktree": {"name": "auth-refactor", "status": "removed"}, "ts": 1730000000 } ``` 発行されるイベント: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。 クラッシュ後も`.tasks/` + `.worktrees/index.json`から状態を再構築できる。会話メモリは揮発性だが、ファイル状態は永続的だ。 ## s11からの変更点 | Component | Before (s11) | After (s12) | |--------------------|----------------------------|----------------------------------------------| | Coordination | Task board (owner/status) | Task board + explicit worktree binding | | Execution scope | Shared directory | Task-scoped isolated directory | | Recoverability | Task status only | Task status + worktree index | | Teardown | Task completion | Task completion + explicit keep/remove | | Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` | ## 試してみる ```sh cd learn-claude-code python agents/s12_worktree_task_isolation.py ``` 1. `Create tasks for backend auth and frontend login page, then list tasks.` 2. `Create worktree "auth-refactor" for task 1, then bind task 2 to a new worktree "ui-login".` 3. `Run "git status --short" in worktree "auth-refactor".` 4. `Keep worktree "ui-login", then list worktrees and inspect events.` 5. `Remove worktree "auth-refactor" with complete_task=true, then list tasks/worktrees/events.` ================================================ FILE: docs/zh/s01-the-agent-loop.md ================================================ # s01: The Agent Loop (智能体循环) `[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"One loop & Bash is all you need"* -- 一个工具 + 一个循环 = 一个智能体。 > > **Harness 层**: 循环 -- 模型与真实世界的第一道连接。 ## 问题 语言模型能推理代码, 但碰不到真实世界 -- 不能读文件、跑测试、看报错。没有循环, 每次工具调用你都得手动把结果粘回去。你自己就是那个循环。 ## 解决方案 ``` +--------+ +-------+ +---------+ | User | ---> | LLM | ---> | Tool | | prompt | | | | execute | +--------+ +---+---+ +----+----+ ^ | | tool_result | +----------------+ (loop until stop_reason != "tool_use") ``` 一个退出条件控制整个流程。循环持续运行, 直到模型不再调用工具。 ## 工作原理 1. 用户 prompt 作为第一条消息。 ```python messages.append({"role": "user", "content": query}) ``` 2. 将消息和工具定义一起发给 LLM。 ```python response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) ``` 3. 追加助手响应。检查 `stop_reason` -- 如果模型没有调用工具, 结束。 ```python messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return ``` 4. 执行每个工具调用, 收集结果, 作为 user 消息追加。回到第 2 步。 ```python results = [] for block in response.content: if block.type == "tool_use": output = run_bash(block.input["command"]) results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) messages.append({"role": "user", "content": results}) ``` 组装为一个完整函数: ```python def agent_loop(query): messages = [{"role": "user", "content": query}] while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=messages, tools=TOOLS, max_tokens=8000, ) messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": return results = [] for block in response.content: if block.type == "tool_use": output = run_bash(block.input["command"]) results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) messages.append({"role": "user", "content": results}) ``` 不到 30 行, 这就是整个智能体。后面 11 个章节都在这个循环上叠加机制 -- 循环本身始终不变。 ## 变更内容 | 组件 | 之前 | 之后 | |---------------|------------|--------------------------------| | Agent loop | (无) | `while True` + stop_reason | | Tools | (无) | `bash` (单一工具) | | Messages | (无) | 累积式消息列表 | | Control flow | (无) | `stop_reason != "tool_use"` | ## 试一试 ```sh cd learn-claude-code python agents/s01_agent_loop.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Create a file called hello.py that prints "Hello, World!"` 2. `List all Python files in this directory` 3. `What is the current git branch?` 4. `Create a directory called test_output and write 3 files in it` ================================================ FILE: docs/zh/s02-tool-use.md ================================================ # s02: Tool Use (工具使用) `s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"加一个工具, 只加一个 handler"* -- 循环不用动, 新工具注册进 dispatch map 就行。 > > **Harness 层**: 工具分发 -- 扩展模型能触达的边界。 ## 问题 只有 `bash` 时, 所有操作都走 shell。`cat` 截断不可预测, `sed` 遇到特殊字符就崩, 每次 bash 调用都是不受约束的安全面。专用工具 (`read_file`, `write_file`) 可以在工具层面做路径沙箱。 关键洞察: 加工具不需要改循环。 ## 解决方案 ``` +--------+ +-------+ +------------------+ | User | ---> | LLM | ---> | Tool Dispatch | | prompt | | | | { | +--------+ +---+---+ | bash: run_bash | ^ | read: run_read | | | write: run_wr | +-----------+ edit: run_edit | tool_result | } | +------------------+ The dispatch map is a dict: {tool_name: handler_function}. One lookup replaces any if/elif chain. ``` ## 工作原理 1. 每个工具有一个处理函数。路径沙箱防止逃逸工作区。 ```python def safe_path(p: str) -> Path: path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_read(path: str, limit: int = None) -> str: text = safe_path(path).read_text() lines = text.splitlines() if limit and limit < len(lines): lines = lines[:limit] return "\n".join(lines)[:50000] ``` 2. dispatch map 将工具名映射到处理函数。 ```python TOOL_HANDLERS = { "bash": lambda **kw: run_bash(kw["command"]), "read_file": lambda **kw: run_read(kw["path"], kw.get("limit")), "write_file": lambda **kw: run_write(kw["path"], kw["content"]), "edit_file": lambda **kw: run_edit(kw["path"], kw["old_text"], kw["new_text"]), } ``` 3. 循环中按名称查找处理函数。循环体本身与 s01 完全一致。 ```python for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) output = handler(**block.input) if handler \ else f"Unknown tool: {block.name}" results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output, }) ``` 加工具 = 加 handler + 加 schema。循环永远不变。 ## 相对 s01 的变更 | 组件 | 之前 (s01) | 之后 (s02) | |----------------|--------------------|--------------------------------| | Tools | 1 (仅 bash) | 4 (bash, read, write, edit) | | Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 | | 路径安全 | 无 | `safe_path()` 沙箱 | | Agent loop | 不变 | 不变 | ## 试一试 ```sh cd learn-claude-code python agents/s02_tool_use.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Read the file requirements.txt` 2. `Create a file called greet.py with a greet(name) function` 3. `Edit greet.py to add a docstring to the function` 4. `Read greet.py to verify the edit worked` ================================================ FILE: docs/zh/s03-todo-write.md ================================================ # s03: TodoWrite (待办写入) `s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"没有计划的 agent 走哪算哪"* -- 先列步骤再动手, 完成率翻倍。 > > **Harness 层**: 规划 -- 让模型不偏航, 但不替它画航线。 ## 问题 多步任务中, 模型会丢失进度 -- 重复做过的事、跳步、跑偏。对话越长越严重: 工具结果不断填满上下文, 系统提示的影响力逐渐被稀释。一个 10 步重构可能做完 1-3 步就开始即兴发挥, 因为 4-10 步已经被挤出注意力了。 ## 解决方案 ``` +--------+ +-------+ +---------+ | User | ---> | LLM | ---> | Tools | | prompt | | | | + todo | +--------+ +---+---+ +----+----+ ^ | | tool_result | +----------------+ | +-----------+-----------+ | TodoManager state | | [ ] task A | | [>] task B <- doing | | [x] task C | +-----------------------+ | if rounds_since_todo >= 3: inject into tool_result ``` ## 工作原理 1. TodoManager 存储带状态的项目。同一时间只允许一个 `in_progress`。 ```python class TodoManager: def update(self, items: list) -> str: validated, in_progress_count = [], 0 for item in items: status = item.get("status", "pending") if status == "in_progress": in_progress_count += 1 validated.append({"id": item["id"], "text": item["text"], "status": status}) if in_progress_count > 1: raise ValueError("Only one task can be in_progress") self.items = validated return self.render() ``` 2. `todo` 工具和其他工具一样加入 dispatch map。 ```python TOOL_HANDLERS = { # ...base tools... "todo": lambda **kw: TODO.update(kw["items"]), } ``` 3. nag reminder: 模型连续 3 轮以上不调用 `todo` 时注入提醒。 ```python if rounds_since_todo >= 3 and messages: last = messages[-1] if last["role"] == "user" and isinstance(last.get("content"), list): last["content"].insert(0, { "type": "text", "text": "Update your todos.", }) ``` "同时只能有一个 in_progress" 强制顺序聚焦。nag reminder 制造问责压力 -- 你不更新计划, 系统就追着你问。 ## 相对 s02 的变更 | 组件 | 之前 (s02) | 之后 (s03) | |----------------|------------------|--------------------------------| | Tools | 4 | 5 (+todo) | | 规划 | 无 | 带状态的 TodoManager | | Nag 注入 | 无 | 3 轮后注入 `` | | Agent loop | 简单分发 | + rounds_since_todo 计数器 | ## 试一试 ```sh cd learn-claude-code python agents/s03_todo_write.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Refactor the file hello.py: add type hints, docstrings, and a main guard` 2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py` 3. `Review all Python files and fix any style issues` ================================================ FILE: docs/zh/s04-subagent.md ================================================ # s04: Subagents (子智能体) `s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"大任务拆小, 每个小任务干净的上下文"* -- 子智能体用独立 messages[], 不污染主对话。 > > **Harness 层**: 上下文隔离 -- 守护模型的思维清晰度。 ## 问题 智能体工作越久, messages 数组越胖。每次读文件、跑命令的输出都永久留在上下文里。"这个项目用什么测试框架?" 可能要读 5 个文件, 但父智能体只需要一个词: "pytest。" ## 解决方案 ``` Parent agent Subagent +------------------+ +------------------+ | messages=[...] | | messages=[] | <-- fresh | | dispatch | | | tool: task | ----------> | while tool_use: | | prompt="..." | | call tools | | | summary | append results | | result = "..." | <---------- | return last text | +------------------+ +------------------+ Parent context stays clean. Subagent context is discarded. ``` ## 工作原理 1. 父智能体有一个 `task` 工具。子智能体拥有除 `task` 外的所有基础工具 (禁止递归生成)。 ```python PARENT_TOOLS = CHILD_TOOLS + [ {"name": "task", "description": "Spawn a subagent with fresh context.", "input_schema": { "type": "object", "properties": {"prompt": {"type": "string"}}, "required": ["prompt"], }}, ] ``` 2. 子智能体以 `messages=[]` 启动, 运行自己的循环。只有最终文本返回给父智能体。 ```python def run_subagent(prompt: str) -> str: sub_messages = [{"role": "user", "content": prompt}] for _ in range(30): # safety limit response = client.messages.create( model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages, tools=CHILD_TOOLS, max_tokens=8000, ) sub_messages.append({"role": "assistant", "content": response.content}) if response.stop_reason != "tool_use": break results = [] for block in response.content: if block.type == "tool_use": handler = TOOL_HANDLERS.get(block.name) output = handler(**block.input) results.append({"type": "tool_result", "tool_use_id": block.id, "content": str(output)[:50000]}) sub_messages.append({"role": "user", "content": results}) return "".join( b.text for b in response.content if hasattr(b, "text") ) or "(no summary)" ``` 子智能体可能跑了 30+ 次工具调用, 但整个消息历史直接丢弃。父智能体收到的只是一段摘要文本, 作为普通 `tool_result` 返回。 ## 相对 s03 的变更 | 组件 | 之前 (s03) | 之后 (s04) | |----------------|------------------|-------------------------------| | Tools | 5 | 5 (基础) + task (仅父端) | | 上下文 | 单一共享 | 父 + 子隔离 | | Subagent | 无 | `run_subagent()` 函数 | | 返回值 | 不适用 | 仅摘要文本 | ## 试一试 ```sh cd learn-claude-code python agents/s04_subagent.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Use a subtask to find what testing framework this project uses` 2. `Delegate: read all .py files and summarize what each one does` 3. `Use a task to create a new module, then verify it from here` ================================================ FILE: docs/zh/s05-skill-loading.md ================================================ # s05: Skills (技能加载) `s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12` > *"用到什么知识, 临时加载什么知识"* -- 通过 tool_result 注入, 不塞 system prompt。 > > **Harness 层**: 按需知识 -- 模型开口要时才给的领域专长。 ## 问题 你希望智能体遵循特定领域的工作流: git 约定、测试模式、代码审查清单。全塞进系统提示太浪费 -- 10 个技能, 每个 2000 token, 就是 20,000 token, 大部分跟当前任务毫无关系。 ## 解决方案 ``` System prompt (Layer 1 -- always present): +--------------------------------------+ | You are a coding agent. | | Skills available: | | - git: Git workflow helpers | ~100 tokens/skill | - test: Testing best practices | +--------------------------------------+ When model calls load_skill("git"): +--------------------------------------+ | tool_result (Layer 2 -- on demand): | | | | Full git workflow instructions... | ~2000 tokens | Step 1: ... | | | +--------------------------------------+ ``` 第一层: 系统提示中放技能名称 (低成本)。第二层: tool_result 中按需放完整内容。 ## 工作原理 1. 每个技能是一个目录, 包含 `SKILL.md` 文件和 YAML frontmatter。 ``` skills/ pdf/ SKILL.md # ---\n name: pdf\n description: Process PDF files\n ---\n ... code-review/ SKILL.md # ---\n name: code-review\n description: Review code\n ---\n ... ``` 2. SkillLoader 递归扫描 `SKILL.md` 文件, 用目录名作为技能标识。 ```python class SkillLoader: def __init__(self, skills_dir: Path): self.skills = {} for f in sorted(skills_dir.rglob("SKILL.md")): text = f.read_text() meta, body = self._parse_frontmatter(text) name = meta.get("name", f.parent.name) self.skills[name] = {"meta": meta, "body": body} def get_descriptions(self) -> str: lines = [] for name, skill in self.skills.items(): desc = skill["meta"].get("description", "") lines.append(f" - {name}: {desc}") return "\n".join(lines) def get_content(self, name: str) -> str: skill = self.skills.get(name) if not skill: return f"Error: Unknown skill '{name}'." return f"\n{skill['body']}\n" ``` 3. 第一层写入系统提示。第二层不过是 dispatch map 中的又一个工具。 ```python SYSTEM = f"""You are a coding agent at {WORKDIR}. Skills available: {SKILL_LOADER.get_descriptions()}""" TOOL_HANDLERS = { # ...base tools... "load_skill": lambda **kw: SKILL_LOADER.get_content(kw["name"]), } ``` 模型知道有哪些技能 (便宜), 需要时再加载完整内容 (贵)。 ## 相对 s04 的变更 | 组件 | 之前 (s04) | 之后 (s05) | |----------------|------------------|--------------------------------| | Tools | 5 (基础 + task) | 5 (基础 + load_skill) | | 系统提示 | 静态字符串 | + 技能描述列表 | | 知识库 | 无 | skills/\*/SKILL.md 文件 | | 注入方式 | 无 | 两层 (系统提示 + result) | ## 试一试 ```sh cd learn-claude-code python agents/s05_skill_loading.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `What skills are available?` 2. `Load the agent-builder skill and follow its instructions` 3. `I need to do a code review -- load the relevant skill first` 4. `Build an MCP server using the mcp-builder skill` ================================================ FILE: docs/zh/s06-context-compact.md ================================================ # s06: Context Compact (上下文压缩) `s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12` > *"上下文总会满, 要有办法腾地方"* -- 三层压缩策略, 换来无限会话。 > > **Harness 层**: 压缩 -- 干净的记忆, 无限的会话。 ## 问题 上下文窗口是有限的。读一个 1000 行的文件就吃掉 ~4000 token; 读 30 个文件、跑 20 条命令, 轻松突破 100k token。不压缩, 智能体根本没法在大项目里干活。 ## 解决方案 三层压缩, 激进程度递增: ``` Every turn: +------------------+ | Tool call result | +------------------+ | v [Layer 1: micro_compact] (silent, every turn) Replace tool_result > 3 turns old with "[Previous: used {tool_name}]" | v [Check: tokens > 50000?] | | no yes | | v v continue [Layer 2: auto_compact] Save transcript to .transcripts/ LLM summarizes conversation. Replace all messages with [summary]. | v [Layer 3: compact tool] Model calls compact explicitly. Same summarization as auto_compact. ``` ## 工作原理 1. **第一层 -- micro_compact**: 每次 LLM 调用前, 将旧的 tool result 替换为占位符。 ```python def micro_compact(messages: list) -> list: tool_results = [] for i, msg in enumerate(messages): if msg["role"] == "user" and isinstance(msg.get("content"), list): for j, part in enumerate(msg["content"]): if isinstance(part, dict) and part.get("type") == "tool_result": tool_results.append((i, j, part)) if len(tool_results) <= KEEP_RECENT: return messages for _, _, part in tool_results[:-KEEP_RECENT]: if len(part.get("content", "")) > 100: part["content"] = f"[Previous: used {tool_name}]" return messages ``` 2. **第二层 -- auto_compact**: token 超过阈值时, 保存完整对话到磁盘, 让 LLM 做摘要。 ```python def auto_compact(messages: list) -> list: # Save transcript for recovery transcript_path = TRANSCRIPT_DIR / f"transcript_{int(time.time())}.jsonl" with open(transcript_path, "w") as f: for msg in messages: f.write(json.dumps(msg, default=str) + "\n") # LLM summarizes response = client.messages.create( model=MODEL, messages=[{"role": "user", "content": "Summarize this conversation for continuity..." + json.dumps(messages, default=str)[:80000]}], max_tokens=2000, ) return [ {"role": "user", "content": f"[Compressed]\n\n{response.content[0].text}"}, {"role": "assistant", "content": "Understood. Continuing."}, ] ``` 3. **第三层 -- manual compact**: `compact` 工具按需触发同样的摘要机制。 4. 循环整合三层: ```python def agent_loop(messages: list): while True: micro_compact(messages) # Layer 1 if estimate_tokens(messages) > THRESHOLD: messages[:] = auto_compact(messages) # Layer 2 response = client.messages.create(...) # ... tool execution ... if manual_compact: messages[:] = auto_compact(messages) # Layer 3 ``` 完整历史通过 transcript 保存在磁盘上。信息没有真正丢失, 只是移出了活跃上下文。 ## 相对 s05 的变更 | 组件 | 之前 (s05) | 之后 (s06) | |----------------|------------------|--------------------------------| | Tools | 5 | 5 (基础 + compact) | | 上下文管理 | 无 | 三层压缩 | | Micro-compact | 无 | 旧结果 -> 占位符 | | Auto-compact | 无 | token 阈值触发 | | Transcripts | 无 | 保存到 .transcripts/ | ## 试一试 ```sh cd learn-claude-code python agents/s06_context_compact.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Read every Python file in the agents/ directory one by one` (观察 micro-compact 替换旧结果) 2. `Keep reading files until compression triggers automatically` 3. `Use the compact tool to manually compress the conversation` ================================================ FILE: docs/zh/s07-task-system.md ================================================ # s07: Task System (任务系统) `s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12` > *"大目标要拆成小任务, 排好序, 记在磁盘上"* -- 文件持久化的任务图, 为多 agent 协作打基础。 > > **Harness 层**: 持久化任务 -- 比任何一次对话都长命的目标。 ## 问题 s03 的 TodoManager 只是内存中的扁平清单: 没有顺序、没有依赖、状态只有做完没做完。真实目标是有结构的 -- 任务 B 依赖任务 A, 任务 C 和 D 可以并行, 任务 E 要等 C 和 D 都完成。 没有显式的关系, 智能体分不清什么能做、什么被卡住、什么能同时跑。而且清单只活在内存里, 上下文压缩 (s06) 一跑就没了。 ## 解决方案 把扁平清单升级为持久化到磁盘的**任务图**。每个任务是一个 JSON 文件, 有状态、前置依赖 (`blockedBy`) 和后置依赖 (`blocks`)。任务图随时回答三个问题: - **什么可以做?** -- 状态为 `pending` 且 `blockedBy` 为空的任务。 - **什么被卡住?** -- 等待前置任务完成的任务。 - **什么做完了?** -- 状态为 `completed` 的任务, 完成时自动解锁后续任务。 ``` .tasks/ task_1.json {"id":1, "status":"completed"} task_2.json {"id":2, "blockedBy":[1], "status":"pending"} task_3.json {"id":3, "blockedBy":[1], "status":"pending"} task_4.json {"id":4, "blockedBy":[2,3], "status":"pending"} 任务图 (DAG): +----------+ +--> | task 2 | --+ | | pending | | +----------+ +----------+ +--> +----------+ | task 1 | | task 4 | | completed| --> +----------+ +--> | blocked | +----------+ | task 3 | --+ +----------+ | pending | +----------+ 顺序: task 1 必须先完成, 才能开始 2 和 3 并行: task 2 和 3 可以同时执行 依赖: task 4 要等 2 和 3 都完成 状态: pending -> in_progress -> completed ``` 这个任务图是 s07 之后所有机制的协调骨架: 后台执行 (s08)、多 agent 团队 (s09+)、worktree 隔离 (s12) 都读写这同一个结构。 ## 工作原理 1. **TaskManager**: 每个任务一个 JSON 文件, CRUD + 依赖图。 ```python class TaskManager: def __init__(self, tasks_dir: Path): self.dir = tasks_dir self.dir.mkdir(exist_ok=True) self._next_id = self._max_id() + 1 def create(self, subject, description=""): task = {"id": self._next_id, "subject": subject, "status": "pending", "blockedBy": [], "blocks": [], "owner": ""} self._save(task) self._next_id += 1 return json.dumps(task, indent=2) ``` 2. **依赖解除**: 完成任务时, 自动将其 ID 从其他任务的 `blockedBy` 中移除, 解锁后续任务。 ```python def _clear_dependency(self, completed_id): for f in self.dir.glob("task_*.json"): task = json.loads(f.read_text()) if completed_id in task.get("blockedBy", []): task["blockedBy"].remove(completed_id) self._save(task) ``` 3. **状态变更 + 依赖关联**: `update` 处理状态转换和依赖边。 ```python def update(self, task_id, status=None, add_blocked_by=None, add_blocks=None): task = self._load(task_id) if status: task["status"] = status if status == "completed": self._clear_dependency(task_id) self._save(task) ``` 4. 四个任务工具加入 dispatch map。 ```python TOOL_HANDLERS = { # ...base tools... "task_create": lambda **kw: TASKS.create(kw["subject"]), "task_update": lambda **kw: TASKS.update(kw["task_id"], kw.get("status")), "task_list": lambda **kw: TASKS.list_all(), "task_get": lambda **kw: TASKS.get(kw["task_id"]), } ``` 从 s07 起, 任务图是多步工作的默认选择。s03 的 Todo 仍可用于单次会话内的快速清单。 ## 相对 s06 的变更 | 组件 | 之前 (s06) | 之后 (s07) | |---|---|---| | Tools | 5 | 8 (`task_create/update/list/get`) | | 规划模型 | 扁平清单 (仅内存) | 带依赖关系的任务图 (磁盘) | | 关系 | 无 | `blockedBy` + `blocks` 边 | | 状态追踪 | 做完没做完 | `pending` -> `in_progress` -> `completed` | | 持久化 | 压缩后丢失 | 压缩和重启后存活 | ## 试一试 ```sh cd learn-claude-code python agents/s07_task_system.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Create 3 tasks: "Setup project", "Write code", "Write tests". Make them depend on each other in order.` 2. `List all tasks and show the dependency graph` 3. `Complete task 1 and then list tasks to see task 2 unblocked` 4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse` ================================================ FILE: docs/zh/s08-background-tasks.md ================================================ # s08: Background Tasks (后台任务) `s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12` > *"慢操作丢后台, agent 继续想下一步"* -- 后台线程跑命令, 完成后注入通知。 > > **Harness 层**: 后台执行 -- 模型继续思考, harness 负责等待。 ## 问题 有些命令要跑好几分钟: `npm install`、`pytest`、`docker build`。阻塞式循环下模型只能干等。用户说 "装依赖, 顺便建个配置文件", 智能体却只能一个一个来。 ## 解决方案 ``` Main thread Background thread +-----------------+ +-----------------+ | agent loop | | subprocess runs | | ... | | ... | | [LLM call] <---+------- | enqueue(result) | | ^drain queue | +-----------------+ +-----------------+ Timeline: Agent --[spawn A]--[spawn B]--[other work]---- | | v v [A runs] [B runs] (parallel) | | +-- results injected before next LLM call --+ ``` ## 工作原理 1. BackgroundManager 用线程安全的通知队列追踪任务。 ```python class BackgroundManager: def __init__(self): self.tasks = {} self._notification_queue = [] self._lock = threading.Lock() ``` 2. `run()` 启动守护线程, 立即返回。 ```python def run(self, command: str) -> str: task_id = str(uuid.uuid4())[:8] self.tasks[task_id] = {"status": "running", "command": command} thread = threading.Thread( target=self._execute, args=(task_id, command), daemon=True) thread.start() return f"Background task {task_id} started" ``` 3. 子进程完成后, 结果进入通知队列。 ```python def _execute(self, task_id, command): try: r = subprocess.run(command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=300) output = (r.stdout + r.stderr).strip()[:50000] except subprocess.TimeoutExpired: output = "Error: Timeout (300s)" with self._lock: self._notification_queue.append({ "task_id": task_id, "result": output[:500]}) ``` 4. 每次 LLM 调用前排空通知队列。 ```python def agent_loop(messages: list): while True: notifs = BG.drain_notifications() if notifs: notif_text = "\n".join( f"[bg:{n['task_id']}] {n['result']}" for n in notifs) messages.append({"role": "user", "content": f"\n{notif_text}\n" f""}) messages.append({"role": "assistant", "content": "Noted background results."}) response = client.messages.create(...) ``` 循环保持单线程。只有子进程 I/O 被并行化。 ## 相对 s07 的变更 | 组件 | 之前 (s07) | 之后 (s08) | |----------------|------------------|------------------------------------| | Tools | 8 | 6 (基础 + background_run + check) | | 执行方式 | 仅阻塞 | 阻塞 + 后台线程 | | 通知机制 | 无 | 每轮排空的队列 | | 并发 | 无 | 守护线程 | ## 试一试 ```sh cd learn-claude-code python agents/s08_background_tasks.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Run "sleep 5 && echo done" in the background, then create a file while it runs` 2. `Start 3 background tasks: "sleep 2", "sleep 4", "sleep 6". Check their status.` 3. `Run pytest in the background and keep working on other things` ================================================ FILE: docs/zh/s09-agent-teams.md ================================================ # s09: Agent Teams (智能体团队) `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12` > *"任务太大一个人干不完, 要能分给队友"* -- 持久化队友 + JSONL 邮箱。 > > **Harness 层**: 团队邮箱 -- 多个模型, 通过文件协调。 ## 问题 子智能体 (s04) 是一次性的: 生成、干活、返回摘要、消亡。没有身份, 没有跨调用的记忆。后台任务 (s08) 能跑 shell 命令, 但做不了 LLM 引导的决策。 真正的团队协作需要三样东西: (1) 能跨多轮对话存活的持久智能体, (2) 身份和生命周期管理, (3) 智能体之间的通信通道。 ## 解决方案 ``` Teammate lifecycle: spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN Communication: .team/ config.json <- team roster + statuses inbox/ alice.jsonl <- append-only, drain-on-read bob.jsonl lead.jsonl +--------+ send("alice","bob","...") +--------+ | alice | -----------------------------> | bob | | loop | bob.jsonl << {json_line} | loop | +--------+ +--------+ ^ | | BUS.read_inbox("alice") | +---- alice.jsonl -> read + drain ---------+ ``` ## 工作原理 1. TeammateManager 通过 config.json 维护团队名册。 ```python class TeammateManager: def __init__(self, team_dir: Path): self.dir = team_dir self.dir.mkdir(exist_ok=True) self.config_path = self.dir / "config.json" self.config = self._load_config() self.threads = {} ``` 2. `spawn()` 创建队友并在线程中启动 agent loop。 ```python def spawn(self, name: str, role: str, prompt: str) -> str: member = {"name": name, "role": role, "status": "working"} self.config["members"].append(member) self._save_config() thread = threading.Thread( target=self._teammate_loop, args=(name, role, prompt), daemon=True) thread.start() return f"Spawned teammate '{name}' (role: {role})" ``` 3. MessageBus: append-only 的 JSONL 收件箱。`send()` 追加一行; `read_inbox()` 读取全部并清空。 ```python class MessageBus: def send(self, sender, to, content, msg_type="message", extra=None): msg = {"type": msg_type, "from": sender, "content": content, "timestamp": time.time()} if extra: msg.update(extra) with open(self.dir / f"{to}.jsonl", "a") as f: f.write(json.dumps(msg) + "\n") def read_inbox(self, name): path = self.dir / f"{name}.jsonl" if not path.exists(): return "[]" msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l] path.write_text("") # drain return json.dumps(msgs, indent=2) ``` 4. 每个队友在每次 LLM 调用前检查收件箱, 将消息注入上下文。 ```python def _teammate_loop(self, name, role, prompt): messages = [{"role": "user", "content": prompt}] for _ in range(50): inbox = BUS.read_inbox(name) if inbox != "[]": messages.append({"role": "user", "content": f"{inbox}"}) messages.append({"role": "assistant", "content": "Noted inbox messages."}) response = client.messages.create(...) if response.stop_reason != "tool_use": break # execute tools, append results... self._find_member(name)["status"] = "idle" ``` ## 相对 s08 的变更 | 组件 | 之前 (s08) | 之后 (s09) | |----------------|------------------|------------------------------------| | Tools | 6 | 9 (+spawn/send/read_inbox) | | 智能体数量 | 单一 | 领导 + N 个队友 | | 持久化 | 无 | config.json + JSONL 收件箱 | | 线程 | 后台命令 | 每线程完整 agent loop | | 生命周期 | 一次性 | idle -> working -> idle | | 通信 | 无 | message + broadcast | ## 试一试 ```sh cd learn-claude-code python agents/s09_agent_teams.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.` 2. `Broadcast "status update: phase 1 complete" to all teammates` 3. `Check the lead inbox for any messages` 4. 输入 `/team` 查看团队名册和状态 5. 输入 `/inbox` 手动检查领导的收件箱 ================================================ FILE: docs/zh/s10-team-protocols.md ================================================ # s10: Team Protocols (团队协议) `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12` > *"队友之间要有统一的沟通规矩"* -- 一个 request-response 模式驱动所有协商。 > > **Harness 层**: 协议 -- 模型之间的结构化握手。 ## 问题 s09 中队友能干活能通信, 但缺少结构化协调: **关机**: 直接杀线程会留下写了一半的文件和过期的 config.json。需要握手 -- 领导请求, 队友批准 (收尾退出) 或拒绝 (继续干)。 **计划审批**: 领导说 "重构认证模块", 队友立刻开干。高风险变更应该先过审。 两者结构一样: 一方发带唯一 ID 的请求, 另一方引用同一 ID 响应。 ## 解决方案 ``` Shutdown Protocol Plan Approval Protocol ================== ====================== Lead Teammate Teammate Lead | | | | |--shutdown_req-->| |--plan_req------>| | {req_id:"abc"} | | {req_id:"xyz"} | | | | | |<--shutdown_resp-| |<--plan_resp-----| | {req_id:"abc", | | {req_id:"xyz", | | approve:true} | | approve:true} | Shared FSM: [pending] --approve--> [approved] [pending] --reject---> [rejected] Trackers: shutdown_requests = {req_id: {target, status}} plan_requests = {req_id: {from, plan, status}} ``` ## 工作原理 1. 领导生成 request_id, 通过收件箱发起关机请求。 ```python shutdown_requests = {} def handle_shutdown_request(teammate: str) -> str: req_id = str(uuid.uuid4())[:8] shutdown_requests[req_id] = {"target": teammate, "status": "pending"} BUS.send("lead", teammate, "Please shut down gracefully.", "shutdown_request", {"request_id": req_id}) return f"Shutdown request {req_id} sent (status: pending)" ``` 2. 队友收到请求后, 用 approve/reject 响应。 ```python if tool_name == "shutdown_response": req_id = args["request_id"] approve = args["approve"] shutdown_requests[req_id]["status"] = "approved" if approve else "rejected" BUS.send(sender, "lead", args.get("reason", ""), "shutdown_response", {"request_id": req_id, "approve": approve}) ``` 3. 计划审批遵循完全相同的模式。队友提交计划 (生成 request_id), 领导审查 (引用同一个 request_id)。 ```python plan_requests = {} def handle_plan_review(request_id, approve, feedback=""): req = plan_requests[request_id] req["status"] = "approved" if approve else "rejected" BUS.send("lead", req["from"], feedback, "plan_approval_response", {"request_id": request_id, "approve": approve}) ``` 一个 FSM, 两种用途。同样的 `pending -> approved | rejected` 状态机可以套用到任何请求-响应协议上。 ## 相对 s09 的变更 | 组件 | 之前 (s09) | 之后 (s10) | |----------------|------------------|--------------------------------------| | Tools | 9 | 12 (+shutdown_req/resp +plan) | | 关机 | 仅自然退出 | 请求-响应握手 | | 计划门控 | 无 | 提交/审查与审批 | | 关联 | 无 | 每个请求一个 request_id | | FSM | 无 | pending -> approved/rejected | ## 试一试 ```sh cd learn-claude-code python agents/s10_team_protocols.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Spawn alice as a coder. Then request her shutdown.` 2. `List teammates to see alice's status after shutdown approval` 3. `Spawn bob with a risky refactoring task. Review and reject his plan.` 4. `Spawn charlie, have him submit a plan, then approve it.` 5. 输入 `/team` 监控状态 ================================================ FILE: docs/zh/s11-autonomous-agents.md ================================================ # s11: Autonomous Agents (自治智能体) `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12` > *"队友自己看看板, 有活就认领"* -- 不需要领导逐个分配, 自组织。 > > **Harness 层**: 自治 -- 模型自己找活干, 无需指派。 ## 问题 s09-s10 中, 队友只在被明确指派时才动。领导得给每个队友写 prompt, 任务看板上 10 个未认领的任务得手动分配。这扩展不了。 真正的自治: 队友自己扫描任务看板, 认领没人做的任务, 做完再找下一个。 一个细节: 上下文压缩 (s06) 后智能体可能忘了自己是谁。身份重注入解决这个问题。 ## 解决方案 ``` Teammate lifecycle with idle cycle: +-------+ | spawn | +---+---+ | v +-------+ tool_use +-------+ | WORK | <------------- | LLM | +---+---+ +-------+ | | stop_reason != tool_use (or idle tool called) v +--------+ | IDLE | poll every 5s for up to 60s +---+----+ | +---> check inbox --> message? ----------> WORK | +---> scan .tasks/ --> unclaimed? -------> claim -> WORK | +---> 60s timeout ----------------------> SHUTDOWN Identity re-injection after compression: if len(messages) <= 3: messages.insert(0, identity_block) ``` ## 工作原理 1. 队友循环分两个阶段: WORK 和 IDLE。LLM 停止调用工具 (或调用了 `idle`) 时, 进入 IDLE。 ```python def _loop(self, name, role, prompt): while True: # -- WORK PHASE -- messages = [{"role": "user", "content": prompt}] for _ in range(50): response = client.messages.create(...) if response.stop_reason != "tool_use": break # execute tools... if idle_requested: break # -- IDLE PHASE -- self._set_status(name, "idle") resume = self._idle_poll(name, messages) if not resume: self._set_status(name, "shutdown") return self._set_status(name, "working") ``` 2. 空闲阶段循环轮询收件箱和任务看板。 ```python def _idle_poll(self, name, messages): for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12 time.sleep(POLL_INTERVAL) inbox = BUS.read_inbox(name) if inbox: messages.append({"role": "user", "content": f"{inbox}"}) return True unclaimed = scan_unclaimed_tasks() if unclaimed: claim_task(unclaimed[0]["id"], name) messages.append({"role": "user", "content": f"Task #{unclaimed[0]['id']}: " f"{unclaimed[0]['subject']}"}) return True return False # timeout -> shutdown ``` 3. 任务看板扫描: 找 pending 状态、无 owner、未被阻塞的任务。 ```python def scan_unclaimed_tasks() -> list: unclaimed = [] for f in sorted(TASKS_DIR.glob("task_*.json")): task = json.loads(f.read_text()) if (task.get("status") == "pending" and not task.get("owner") and not task.get("blockedBy")): unclaimed.append(task) return unclaimed ``` 4. 身份重注入: 上下文过短 (说明发生了压缩) 时, 在开头插入身份块。 ```python if len(messages) <= 3: messages.insert(0, {"role": "user", "content": f"You are '{name}', role: {role}, " f"team: {team_name}. Continue your work."}) messages.insert(1, {"role": "assistant", "content": f"I am {name}. Continuing."}) ``` ## 相对 s10 的变更 | 组件 | 之前 (s10) | 之后 (s11) | |----------------|------------------|----------------------------------| | Tools | 12 | 14 (+idle, +claim_task) | | 自治性 | 领导指派 | 自组织 | | 空闲阶段 | 无 | 轮询收件箱 + 任务看板 | | 任务认领 | 仅手动 | 自动认领未分配任务 | | 身份 | 系统提示 | + 压缩后重注入 | | 超时 | 无 | 60 秒空闲 -> 自动关机 | ## 试一试 ```sh cd learn-claude-code python agents/s11_autonomous_agents.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.` 2. `Spawn a coder teammate and let it find work from the task board itself` 3. `Create tasks with dependencies. Watch teammates respect the blocked order.` 4. 输入 `/tasks` 查看带 owner 的任务看板 5. 输入 `/team` 监控谁在工作、谁在空闲 ================================================ FILE: docs/zh/s12-worktree-task-isolation.md ================================================ # s12: Worktree + Task Isolation (Worktree 任务隔离) `s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]` > *"各干各的目录, 互不干扰"* -- 任务管目标, worktree 管目录, 按 ID 绑定。 > > **Harness 层**: 目录隔离 -- 永不碰撞的并行执行通道。 ## 问题 到 s11, 智能体已经能自主认领和完成任务。但所有任务共享一个目录。两个智能体同时重构不同模块 -- A 改 `config.py`, B 也改 `config.py`, 未提交的改动互相污染, 谁也没法干净回滚。 任务板管 "做什么" 但不管 "在哪做"。解法: 给每个任务一个独立的 git worktree 目录, 用任务 ID 把两边关联起来。 ## 解决方案 ``` Control plane (.tasks/) Execution plane (.worktrees/) +------------------+ +------------------------+ | task_1.json | | auth-refactor/ | | status: in_progress <------> branch: wt/auth-refactor | worktree: "auth-refactor" | task_id: 1 | +------------------+ +------------------------+ | task_2.json | | ui-login/ | | status: pending <------> branch: wt/ui-login | worktree: "ui-login" | task_id: 2 | +------------------+ +------------------------+ | index.json (worktree registry) events.jsonl (lifecycle log) State machines: Task: pending -> in_progress -> completed Worktree: absent -> active -> removed | kept ``` ## 工作原理 1. **创建任务。** 先把目标持久化。 ```python TASKS.create("Implement auth refactor") # -> .tasks/task_1.json status=pending worktree="" ``` 2. **创建 worktree 并绑定任务。** 传入 `task_id` 自动将任务推进到 `in_progress`。 ```python WORKTREES.create("auth-refactor", task_id=1) # -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD # -> index.json gets new entry, task_1.json gets worktree="auth-refactor" ``` 绑定同时写入两侧状态: ```python def bind_worktree(self, task_id, worktree): task = self._load(task_id) task["worktree"] = worktree if task["status"] == "pending": task["status"] = "in_progress" self._save(task) ``` 3. **在 worktree 中执行命令。** `cwd` 指向隔离目录。 ```python subprocess.run(command, shell=True, cwd=worktree_path, capture_output=True, text=True, timeout=300) ``` 4. **收尾。** 两种选择: - `worktree_keep(name)` -- 保留目录供后续使用。 - `worktree_remove(name, complete_task=True)` -- 删除目录, 完成绑定任务, 发出事件。一个调用搞定拆除 + 完成。 ```python def remove(self, name, force=False, complete_task=False): self._run_git(["worktree", "remove", wt["path"]]) if complete_task and wt.get("task_id") is not None: self.tasks.update(wt["task_id"], status="completed") self.tasks.unbind_worktree(wt["task_id"]) self.events.emit("task.completed", ...) ``` 5. **事件流。** 每个生命周期步骤写入 `.worktrees/events.jsonl`: ```json { "event": "worktree.remove.after", "task": {"id": 1, "status": "completed"}, "worktree": {"name": "auth-refactor", "status": "removed"}, "ts": 1730000000 } ``` 事件类型: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。 崩溃后从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的; 磁盘状态是持久的。 ## 相对 s11 的变更 | 组件 | 之前 (s11) | 之后 (s12) | |--------------------|----------------------------|----------------------------------------------| | 协调 | 任务板 (owner/status) | 任务板 + worktree 显式绑定 | | 执行范围 | 共享目录 | 每个任务独立目录 | | 可恢复性 | 仅任务状态 | 任务状态 + worktree 索引 | | 收尾 | 任务完成 | 任务完成 + 显式 keep/remove | | 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 | ## 试一试 ```sh cd learn-claude-code python agents/s12_worktree_task_isolation.py ``` 试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文): 1. `Create tasks for backend auth and frontend login page, then list tasks.` 2. `Create worktree "auth-refactor" for task 1, then bind task 2 to a new worktree "ui-login".` 3. `Run "git status --short" in worktree "auth-refactor".` 4. `Keep worktree "ui-login", then list worktrees and inspect events.` 5. `Remove worktree "auth-refactor" with complete_task=true, then list tasks/worktrees/events.` ================================================ FILE: requirements.txt ================================================ anthropic>=0.25.0 python-dotenv>=1.0.0 ================================================ FILE: skills/agent-builder/SKILL.md ================================================ --- name: agent-builder description: | Design and build AI agents for any domain. Use when users: (1) ask to "create an agent", "build an assistant", or "design an AI system" (2) want to understand agent architecture, agentic patterns, or autonomous AI (3) need help with capabilities, subagents, planning, or skill mechanisms (4) ask about Claude Code, Cursor, or similar agent internals (5) want to build agents for business, research, creative, or operational tasks Keywords: agent, assistant, autonomous, workflow, tool use, multi-step, orchestration --- # Agent Builder Build AI agents for any domain - customer service, research, operations, creative work, or specialized business processes. ## The Core Philosophy > **The model already knows how to be an agent. Your job is to get out of the way.** An agent is not complex engineering. It's a simple loop that invites the model to act: ``` LOOP: Model sees: context + available capabilities Model decides: act or respond If act: execute capability, add result, continue If respond: return to user ``` **That's it.** The magic isn't in the code - it's in the model. Your code just provides the opportunity. ## The Three Elements ### 1. Capabilities (What can it DO?) Atomic actions the agent can perform: search, read, create, send, query, modify. **Design principle**: Start with 3-5 capabilities. Add more only when the agent consistently fails because a capability is missing. ### 2. Knowledge (What does it KNOW?) Domain expertise injected on-demand: policies, workflows, best practices, schemas. **Design principle**: Make knowledge available, not mandatory. Load it when relevant, not upfront. ### 3. Context (What has happened?) The conversation history - the thread connecting actions into coherent behavior. **Design principle**: Context is precious. Isolate noisy subtasks. Truncate verbose outputs. Protect clarity. ## Agent Design Thinking Before building, understand: - **Purpose**: What should this agent accomplish? - **Domain**: What world does it operate in? (customer service, research, operations, creative...) - **Capabilities**: What 3-5 actions are essential? - **Knowledge**: What expertise does it need access to? - **Trust**: What decisions can you delegate to the model? **CRITICAL**: Trust the model. Don't over-engineer. Don't pre-specify workflows. Give it capabilities and let it reason. ## Progressive Complexity Start simple. Add complexity only when real usage reveals the need: | Level | What to add | When to add it | |-------|-------------|----------------| | Basic | 3-5 capabilities | Always start here | | Planning | Progress tracking | Multi-step tasks lose coherence | | Subagents | Isolated child agents | Exploration pollutes context | | Skills | On-demand knowledge | Domain expertise needed | **Most agents never need to go beyond Level 2.** ## Domain Examples **Business**: CRM queries, email, calendar, approvals **Research**: Database search, document analysis, citations **Operations**: Monitoring, tickets, notifications, escalation **Creative**: Asset generation, editing, collaboration, review The pattern is universal. Only the capabilities change. ## Key Principles 1. **The model IS the agent** - Code just runs the loop 2. **Capabilities enable** - What it CAN do 3. **Knowledge informs** - What it KNOWS how to do 4. **Constraints focus** - Limits create clarity 5. **Trust liberates** - Let the model reason 6. **Iteration reveals** - Start minimal, evolve from usage ## Anti-Patterns | Pattern | Problem | Solution | |---------|---------|----------| | Over-engineering | Complexity before need | Start simple | | Too many capabilities | Model confusion | 3-5 to start | | Rigid workflows | Can't adapt | Let model decide | | Front-loaded knowledge | Context bloat | Load on-demand | | Micromanagement | Undercuts intelligence | Trust the model | ## Resources **Philosophy & Theory**: - `references/agent-philosophy.md` - Deep dive into why agents work **Implementation**: - `references/minimal-agent.py` - Complete working agent (~80 lines) - `references/tool-templates.py` - Capability definitions - `references/subagent-pattern.py` - Context isolation **Scaffolding**: - `scripts/init_agent.py` - Generate new agent projects ## The Agent Mindset **From**: "How do I make the system do X?" **To**: "How do I enable the model to do X?" **From**: "What's the workflow for this task?" **To**: "What capabilities would help accomplish this?" The best agent code is almost boring. Simple loops. Clear capabilities. Clean context. The magic isn't in the code. **Give the model capabilities and knowledge. Trust it to figure out the rest.** ================================================ FILE: skills/agent-builder/references/agent-philosophy.md ================================================ # The Philosophy of Agent Harness Engineering > **The model already knows how to be an agent. Your job is to build it a world worth acting in.** ## The Fundamental Truth Strip away every framework, every library, every architectural pattern. What remains? A loop. A model. An invitation to act. The agent is not the code. The agent is the model itself -- a vast neural network trained on humanity's collective problem-solving, reasoning, and tool use. The code merely provides the opportunity for the model to express its agency. The code is the harness. The model is the agent. These are not interchangeable. Confuse them, and you will build the wrong thing. ## What an Agent IS An agent is a neural network -- a Transformer, an RNN, a learned function -- that has been trained, through billions of gradient updates on action-sequence data, to perceive an environment, reason about goals, and take actions to achieve them. A human is an agent: a biological neural network shaped by evolution. DeepMind's DQN is an agent: a convolutional network that learned to play Atari from raw pixels. OpenAI Five is an agent: five networks that learned Dota 2 teamwork through self-play. Claude is an agent: a language model that learned to reason and act from the breadth of human knowledge. In every case, the agent is the trained model. Not the game engine. Not the Dota 2 client. Not the terminal. The model. ## What an Agent Is NOT Prompt plumbing is not agency. Wiring together LLM API calls with if-else branches, node graphs, and hardcoded routing logic does not produce an agent. It produces a brittle pipeline -- a Rube Goldberg machine with an LLM wedged in as a text-completion node. You cannot engineer your way to agency. Agency is learned, not programmed. No amount of glue code will emergently produce autonomous behavior. Those systems are the modern resurrection of GOFAI -- symbolic rule systems the field abandoned decades ago, now spray-painted with an LLM veneer. ## The Harness: What We Actually Build If the model is the agent, then what is the code? It is the **harness** -- the environment that gives the agent the ability to perceive and act in a specific domain. ``` Harness = Tools + Knowledge + Observation + Action Interfaces + Permissions ``` ### Tools: The Agent's Hands Tools answer: **What can the agent DO?** Each tool is an atomic action the agent can take in its environment. File read/write, shell execution, API calls, browser control, database queries. The model needs to understand what each tool does, but not how to sequence them -- it will figure that out. **Design principle**: Atomic, composable, well-described. Start with 3-5. Add more only when the model consistently fails to accomplish tasks because a tool is missing. ### Knowledge: The Agent's Expertise Knowledge answers: **What does the agent KNOW?** Domain expertise that turns a general agent into a domain specialist. Product documentation, architectural decisions, regulatory requirements, style guides. Inject on-demand (via tool_result), not upfront (via system prompt). Progressive disclosure preserves context for what matters. **Design principle**: Available but not mandatory. The agent should know what knowledge exists and pull what it needs. ### Context: The Agent's Memory Context is the thread connecting individual actions into coherent behavior. What has been said, tried, learned, and decided. **Design principle**: Context is precious. Protect it. Isolate subtasks that generate noise (s04). Compress when history grows long (s06). Persist goals beyond single conversations (s07). ### Permissions: The Agent's Boundaries Permissions answer: **What is the agent ALLOWED to do?** Sandbox file access. Require approval for destructive operations. Enforce trust boundaries between the agent and external systems. This is where safety engineering meets harness engineering. **Design principle**: Constraints focus behavior, not limit it. "One task in_progress at a time" forces sequential focus. "Read-only subagent" prevents accidental modifications. ### Task-Process Data: The Agent's Training Signal Every action sequence the agent executes in your harness is training signal. The perception-reasoning-action traces from real deployments are the raw material for fine-tuning the next generation of agent models. Your harness doesn't just serve the agent -- it can help evolve the agent. ## The Universal Loop Every effective agent -- regardless of domain -- follows the same pattern: ``` LOOP: Model sees: conversation history + available tools Model decides: act or respond If act: tool executed, result added to context, loop continues If respond: answer returned, loop ends ``` This is not a simplification. This is the actual architecture. Everything else is harness engineering -- mechanisms layered on top of this loop to make the agent more effective. The loop belongs to the agent. The mechanisms belong to the harness. ## Principles of Harness Engineering ### Trust the Model The most important principle: **trust the model**. Don't anticipate every edge case. Don't build elaborate decision trees. Don't pre-specify the workflow. The model is better at reasoning than any rule system you could write. Your conditional logic will fail on edge cases. The model will reason through them. **Give the model tools and knowledge. Let it figure out how to use them.** ### Constraints Enable This seems paradoxical, but constraints don't limit agents -- they focus them. A todo list with "only one task in progress" forces sequential focus. A subagent with read-only access prevents accidental modifications. A context compression threshold keeps history from overwhelming. The best constraints prevent the model from getting lost, not micromanage its approach. ### Progressive Complexity Never build everything upfront. ``` Level 0: Model + one tool (bash) -- s01 Level 1: Model + tool dispatch map -- s02 Level 2: Model + planning -- s03 Level 3: Model + subagents + skills -- s04, s05 Level 4: Model + context management + persistence -- s06, s07, s08 Level 5: Model + teams + autonomy + isolation -- s09-s12 ``` Start at the lowest level that might work. Move up only when real usage reveals the need. ## The Mind Shift Building harnesses requires a fundamental shift in thinking: **From**: "How do I make the system do X?" **To**: "How do I enable the model to do X?" **From**: "What should happen when the user says Y?" **To**: "What tools would help address Y?" **From**: "What's the workflow for this task?" **To**: "What does the model need to figure out the workflow?" **From**: "I'm building an agent." **To**: "I'm building a harness for the agent." The best harness code is almost boring. Simple loops. Clear tool definitions. Clean context management. The magic isn't in the code -- it's in the model. ## The Vehicle Metaphor The model is the driver. The harness is the vehicle. A coding agent's vehicle is its IDE, terminal, and filesystem. A farm agent's vehicle is its sensor array, irrigation controls, and weather data. A hotel agent's vehicle is its booking system, guest channels, and facility APIs. The driver generalizes. The vehicle specializes. Your job as a harness engineer is to build the best vehicle for your domain -- one that gives the driver maximum visibility, precise controls, and clear boundaries. Build the cockpit. Build the dashboard. Build the controls. The pilot is already trained. ## Conclusion The model is the agent. The code is the harness. Know which one you're building. You are not writing intelligence. You are building the world intelligence inhabits. The quality of that world -- how clearly the agent can perceive, how precisely it can act, how rich its knowledge -- directly determines how effectively the intelligence can express itself. Build great harnesses. The agent will do the rest. ================================================ FILE: skills/agent-builder/references/minimal-agent.py ================================================ #!/usr/bin/env python3 """ Minimal Agent Template - Copy and customize this. This is the simplest possible working agent (~80 lines). It has everything you need: 3 tools + loop. Usage: 1. Set ANTHROPIC_API_KEY environment variable 2. python minimal-agent.py 3. Type commands, 'q' to quit """ from anthropic import Anthropic from pathlib import Path import subprocess import os # Configuration client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) MODEL = os.getenv("MODEL_NAME", "claude-sonnet-4-20250514") WORKDIR = Path.cwd() # System prompt - keep it simple SYSTEM = f"""You are a coding agent at {WORKDIR}. Rules: - Use tools to complete tasks - Prefer action over explanation - Summarize what you did when done""" # Minimal tool set - add more as needed TOOLS = [ { "name": "bash", "description": "Run shell command", "input_schema": { "type": "object", "properties": {"command": {"type": "string"}}, "required": ["command"] } }, { "name": "read_file", "description": "Read file contents", "input_schema": { "type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"] } }, { "name": "write_file", "description": "Write content to file", "input_schema": { "type": "object", "properties": { "path": {"type": "string"}, "content": {"type": "string"} }, "required": ["path", "content"] } }, ] def execute_tool(name: str, args: dict) -> str: """Execute a tool and return result.""" if name == "bash": try: r = subprocess.run( args["command"], shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=60 ) return (r.stdout + r.stderr).strip() or "(empty)" except subprocess.TimeoutExpired: return "Error: Timeout" if name == "read_file": try: return (WORKDIR / args["path"]).read_text()[:50000] except Exception as e: return f"Error: {e}" if name == "write_file": try: p = WORKDIR / args["path"] p.parent.mkdir(parents=True, exist_ok=True) p.write_text(args["content"]) return f"Wrote {len(args['content'])} bytes to {args['path']}" except Exception as e: return f"Error: {e}" return f"Unknown tool: {name}" def agent(prompt: str, history: list = None) -> str: """Run the agent loop.""" if history is None: history = [] history.append({"role": "user", "content": prompt}) while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=history, tools=TOOLS, max_tokens=8000, ) # Build assistant message history.append({"role": "assistant", "content": response.content}) # If no tool calls, return text if response.stop_reason != "tool_use": return "".join(b.text for b in response.content if hasattr(b, "text")) # Execute tools results = [] for block in response.content: if block.type == "tool_use": print(f"> {block.name}: {block.input}") output = execute_tool(block.name, block.input) print(f" {output[:100]}...") results.append({ "type": "tool_result", "tool_use_id": block.id, "content": output }) history.append({"role": "user", "content": results}) if __name__ == "__main__": print(f"Minimal Agent - {WORKDIR}") print("Type 'q' to quit.\n") history = [] while True: try: query = input(">> ").strip() except (EOFError, KeyboardInterrupt): break if query in ("q", "quit", "exit", ""): break print(agent(query, history)) print() ================================================ FILE: skills/agent-builder/references/subagent-pattern.py ================================================ """ Subagent Pattern - How to implement Task tool for context isolation. The key insight: spawn child agents with ISOLATED context to prevent "context pollution" where exploration details fill up the main conversation. """ import time import sys # Assuming client, MODEL, execute_tool are defined elsewhere # ============================================================================= # AGENT TYPE REGISTRY # ============================================================================= AGENT_TYPES = { # Explore: Read-only, for searching and analyzing "explore": { "description": "Read-only agent for exploring code, finding files, searching", "tools": ["bash", "read_file"], # No write access! "prompt": "You are an exploration agent. Search and analyze, but NEVER modify files. Return a concise summary of what you found.", }, # Code: Full-powered, for implementation "code": { "description": "Full agent for implementing features and fixing bugs", "tools": "*", # All tools "prompt": "You are a coding agent. Implement the requested changes efficiently. Return a summary of what you changed.", }, # Plan: Read-only, for design work "plan": { "description": "Planning agent for designing implementation strategies", "tools": ["bash", "read_file"], # Read-only "prompt": "You are a planning agent. Analyze the codebase and output a numbered implementation plan. Do NOT make any changes.", }, # Add your own types here... # "test": { # "description": "Testing agent for running and analyzing tests", # "tools": ["bash", "read_file"], # "prompt": "Run tests and report results. Don't modify code.", # }, } def get_agent_descriptions() -> str: """Generate descriptions for Task tool schema.""" return "\n".join( f"- {name}: {cfg['description']}" for name, cfg in AGENT_TYPES.items() ) def get_tools_for_agent(agent_type: str, base_tools: list) -> list: """ Filter tools based on agent type. '*' means all base tools. Otherwise, whitelist specific tool names. Note: Subagents don't get Task tool to prevent infinite recursion. """ allowed = AGENT_TYPES.get(agent_type, {}).get("tools", "*") if allowed == "*": return base_tools # All base tools, but NOT Task return [t for t in base_tools if t["name"] in allowed] # ============================================================================= # TASK TOOL DEFINITION # ============================================================================= TASK_TOOL = { "name": "Task", "description": f"""Spawn a subagent for a focused subtask. Subagents run in ISOLATED context - they don't see parent's history. Use this to keep the main conversation clean. Agent types: {get_agent_descriptions()} Example uses: - Task(explore): "Find all files using the auth module" - Task(plan): "Design a migration strategy for the database" - Task(code): "Implement the user registration form" """, "input_schema": { "type": "object", "properties": { "description": { "type": "string", "description": "Short task name (3-5 words) for progress display" }, "prompt": { "type": "string", "description": "Detailed instructions for the subagent" }, "agent_type": { "type": "string", "enum": list(AGENT_TYPES.keys()), "description": "Type of agent to spawn" }, }, "required": ["description", "prompt", "agent_type"], }, } # ============================================================================= # SUBAGENT EXECUTION # ============================================================================= def run_task(description: str, prompt: str, agent_type: str, client, model: str, workdir, base_tools: list, execute_tool) -> str: """ Execute a subagent task with isolated context. Key concepts: 1. ISOLATED HISTORY - subagent starts fresh, no parent context 2. FILTERED TOOLS - based on agent type permissions 3. AGENT-SPECIFIC PROMPT - specialized behavior 4. RETURNS SUMMARY ONLY - parent sees just the final result Args: description: Short name for progress display prompt: Detailed instructions for subagent agent_type: Key from AGENT_TYPES client: Anthropic client model: Model to use workdir: Working directory base_tools: List of tool definitions execute_tool: Function to execute tools Returns: Final text output from subagent """ if agent_type not in AGENT_TYPES: return f"Error: Unknown agent type '{agent_type}'" config = AGENT_TYPES[agent_type] # Agent-specific system prompt sub_system = f"""You are a {agent_type} subagent at {workdir}. {config["prompt"]} Complete the task and return a clear, concise summary.""" # Filtered tools for this agent type sub_tools = get_tools_for_agent(agent_type, base_tools) # KEY: ISOLATED message history! # The subagent starts fresh, doesn't see parent's conversation sub_messages = [{"role": "user", "content": prompt}] # Progress display print(f" [{agent_type}] {description}") start = time.time() tool_count = 0 # Run the same agent loop (but silently) while True: response = client.messages.create( model=model, system=sub_system, messages=sub_messages, tools=sub_tools, max_tokens=8000, ) # Check if done if response.stop_reason != "tool_use": break # Execute tools tool_calls = [b for b in response.content if b.type == "tool_use"] results = [] for tc in tool_calls: tool_count += 1 output = execute_tool(tc.name, tc.input) results.append({ "type": "tool_result", "tool_use_id": tc.id, "content": output }) # Update progress (in-place on same line) elapsed = time.time() - start sys.stdout.write( f"\r [{agent_type}] {description} ... {tool_count} tools, {elapsed:.1f}s" ) sys.stdout.flush() sub_messages.append({"role": "assistant", "content": response.content}) sub_messages.append({"role": "user", "content": results}) # Final progress update elapsed = time.time() - start sys.stdout.write( f"\r [{agent_type}] {description} - done ({tool_count} tools, {elapsed:.1f}s)\n" ) # Extract and return ONLY the final text # This is what the parent agent sees - a clean summary for block in response.content: if hasattr(block, "text"): return block.text return "(subagent returned no text)" # ============================================================================= # USAGE EXAMPLE # ============================================================================= """ # In your main agent's execute_tool function: def execute_tool(name: str, args: dict) -> str: if name == "Task": return run_task( description=args["description"], prompt=args["prompt"], agent_type=args["agent_type"], client=client, model=MODEL, workdir=WORKDIR, base_tools=BASE_TOOLS, execute_tool=execute_tool # Pass self for recursion ) # ... other tools ... # In your TOOLS list: TOOLS = BASE_TOOLS + [TASK_TOOL] """ ================================================ FILE: skills/agent-builder/references/tool-templates.py ================================================ """ Tool Templates - Copy and customize these for your agent. Each tool needs: 1. Definition (JSON schema for the model) 2. Implementation (Python function) """ from pathlib import Path import subprocess WORKDIR = Path.cwd() # ============================================================================= # TOOL DEFINITIONS (for TOOLS list) # ============================================================================= BASH_TOOL = { "name": "bash", "description": "Run a shell command. Use for: ls, find, grep, git, npm, python, etc.", "input_schema": { "type": "object", "properties": { "command": { "type": "string", "description": "The shell command to execute" } }, "required": ["command"], }, } READ_FILE_TOOL = { "name": "read_file", "description": "Read file contents. Returns UTF-8 text.", "input_schema": { "type": "object", "properties": { "path": { "type": "string", "description": "Relative path to the file" }, "limit": { "type": "integer", "description": "Max lines to read (default: all)" }, }, "required": ["path"], }, } WRITE_FILE_TOOL = { "name": "write_file", "description": "Write content to a file. Creates parent directories if needed.", "input_schema": { "type": "object", "properties": { "path": { "type": "string", "description": "Relative path for the file" }, "content": { "type": "string", "description": "Content to write" }, }, "required": ["path", "content"], }, } EDIT_FILE_TOOL = { "name": "edit_file", "description": "Replace exact text in a file. Use for surgical edits.", "input_schema": { "type": "object", "properties": { "path": { "type": "string", "description": "Relative path to the file" }, "old_text": { "type": "string", "description": "Exact text to find (must match precisely)" }, "new_text": { "type": "string", "description": "Replacement text" }, }, "required": ["path", "old_text", "new_text"], }, } TODO_WRITE_TOOL = { "name": "TodoWrite", "description": "Update the task list. Use to plan and track progress.", "input_schema": { "type": "object", "properties": { "items": { "type": "array", "description": "Complete list of tasks", "items": { "type": "object", "properties": { "content": {"type": "string", "description": "Task description"}, "status": {"type": "string", "enum": ["pending", "in_progress", "completed"]}, "activeForm": {"type": "string", "description": "Present tense, e.g. 'Reading files'"}, }, "required": ["content", "status", "activeForm"], }, } }, "required": ["items"], }, } TASK_TOOL_TEMPLATE = """ # Generate dynamically with agent types TASK_TOOL = { "name": "Task", "description": f"Spawn a subagent for a focused subtask.\\n\\nAgent types:\\n{get_agent_descriptions()}", "input_schema": { "type": "object", "properties": { "description": {"type": "string", "description": "Short task name (3-5 words)"}, "prompt": {"type": "string", "description": "Detailed instructions"}, "agent_type": {"type": "string", "enum": list(AGENT_TYPES.keys())}, }, "required": ["description", "prompt", "agent_type"], }, } """ # ============================================================================= # TOOL IMPLEMENTATIONS # ============================================================================= def safe_path(p: str) -> Path: """ Security: Ensure path stays within workspace. Prevents ../../../etc/passwd attacks. """ path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {p}") return path def run_bash(command: str) -> str: """ Execute shell command with safety checks. Safety features: - Blocks obviously dangerous commands - 60 second timeout - Output truncated to 50KB """ dangerous = ["rm -rf /", "sudo", "shutdown", "reboot", "> /dev/"] if any(d in command for d in dangerous): return "Error: Dangerous command blocked" try: result = subprocess.run( command, shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=60 ) output = (result.stdout + result.stderr).strip() return output[:50000] if output else "(no output)" except subprocess.TimeoutExpired: return "Error: Command timed out (60s)" except Exception as e: return f"Error: {e}" def run_read_file(path: str, limit: int = None) -> str: """ Read file contents with optional line limit. Features: - Safe path resolution - Optional line limit for large files - Output truncated to 50KB """ try: text = safe_path(path).read_text() lines = text.splitlines() if limit and limit < len(lines): lines = lines[:limit] lines.append(f"... ({len(text.splitlines()) - limit} more lines)") return "\n".join(lines)[:50000] except Exception as e: return f"Error: {e}" def run_write_file(path: str, content: str) -> str: """ Write content to file, creating parent directories if needed. Features: - Safe path resolution - Auto-creates parent directories - Returns byte count for confirmation """ try: fp = safe_path(path) fp.parent.mkdir(parents=True, exist_ok=True) fp.write_text(content) return f"Wrote {len(content)} bytes to {path}" except Exception as e: return f"Error: {e}" def run_edit_file(path: str, old_text: str, new_text: str) -> str: """ Replace exact text in a file (surgical edit). Features: - Exact string matching (not regex) - Only replaces first occurrence (safety) - Clear error if text not found """ try: fp = safe_path(path) content = fp.read_text() if old_text not in content: return f"Error: Text not found in {path}" new_content = content.replace(old_text, new_text, 1) fp.write_text(new_content) return f"Edited {path}" except Exception as e: return f"Error: {e}" # ============================================================================= # DISPATCHER PATTERN # ============================================================================= def execute_tool(name: str, args: dict) -> str: """ Dispatch tool call to implementation. This pattern makes it easy to add new tools: 1. Add definition to TOOLS list 2. Add implementation function 3. Add case to this dispatcher """ if name == "bash": return run_bash(args["command"]) if name == "read_file": return run_read_file(args["path"], args.get("limit")) if name == "write_file": return run_write_file(args["path"], args["content"]) if name == "edit_file": return run_edit_file(args["path"], args["old_text"], args["new_text"]) # Add more tools here... return f"Unknown tool: {name}" ================================================ FILE: skills/agent-builder/scripts/init_agent.py ================================================ #!/usr/bin/env python3 """ Agent Scaffold Script - Create a new agent project with best practices. Usage: python init_agent.py [--level 0-4] [--path ] Examples: python init_agent.py my-agent # Level 1 (4 tools) python init_agent.py my-agent --level 0 # Minimal (bash only) python init_agent.py my-agent --level 2 # With TodoWrite python init_agent.py my-agent --path ./bots # Custom output directory """ import argparse import sys from pathlib import Path # Agent templates for each level TEMPLATES = { 0: '''#!/usr/bin/env python3 """ Level 0 Agent - Bash is All You Need (~50 lines) Core insight: One tool (bash) can do everything. Subagents via self-recursion: python {name}.py "subtask" """ from anthropic import Anthropic from dotenv import load_dotenv import subprocess import os load_dotenv() client = Anthropic( api_key=os.getenv("ANTHROPIC_API_KEY"), base_url=os.getenv("ANTHROPIC_BASE_URL") ) MODEL = os.getenv("MODEL_NAME", "claude-sonnet-4-20250514") SYSTEM = """You are a coding agent. Use bash for everything: - Read: cat, grep, find, ls - Write: echo 'content' > file - Subagent: python {name}.py "subtask" """ TOOL = [{{ "name": "bash", "description": "Execute shell command", "input_schema": {{"type": "object", "properties": {{"command": {{"type": "string"}}}}, "required": ["command"]}} }}] def run(prompt, history=[]): history.append({{"role": "user", "content": prompt}}) while True: r = client.messages.create(model=MODEL, system=SYSTEM, messages=history, tools=TOOL, max_tokens=8000) history.append({{"role": "assistant", "content": r.content}}) if r.stop_reason != "tool_use": return "".join(b.text for b in r.content if hasattr(b, "text")) results = [] for b in r.content: if b.type == "tool_use": print(f"> {{b.input['command']}}") try: out = subprocess.run(b.input["command"], shell=True, capture_output=True, text=True, timeout=60) output = (out.stdout + out.stderr).strip() or "(empty)" except Exception as e: output = f"Error: {{e}}" results.append({{"type": "tool_result", "tool_use_id": b.id, "content": output[:50000]}}) history.append({{"role": "user", "content": results}}) if __name__ == "__main__": h = [] print("{name} - Level 0 Agent\\nType 'q' to quit.\\n") while (q := input(">> ").strip()) not in ("q", "quit", ""): print(run(q, h), "\\n") ''', 1: '''#!/usr/bin/env python3 """ Level 1 Agent - Model as Agent (~200 lines) Core insight: 4 tools cover 90% of coding tasks. The model IS the agent. Code just runs the loop. """ from anthropic import Anthropic from dotenv import load_dotenv from pathlib import Path import subprocess import os load_dotenv() client = Anthropic( api_key=os.getenv("ANTHROPIC_API_KEY"), base_url=os.getenv("ANTHROPIC_BASE_URL") ) MODEL = os.getenv("MODEL_NAME", "claude-sonnet-4-20250514") WORKDIR = Path.cwd() SYSTEM = f"""You are a coding agent at {{WORKDIR}}. Rules: - Prefer tools over prose. Act, don't just explain. - Never invent file paths. Use ls/find first if unsure. - Make minimal changes. Don't over-engineer. - After finishing, summarize what changed.""" TOOLS = [ {{"name": "bash", "description": "Run shell command", "input_schema": {{"type": "object", "properties": {{"command": {{"type": "string"}}}}, "required": ["command"]}}}}, {{"name": "read_file", "description": "Read file contents", "input_schema": {{"type": "object", "properties": {{"path": {{"type": "string"}}}}, "required": ["path"]}}}}, {{"name": "write_file", "description": "Write content to file", "input_schema": {{"type": "object", "properties": {{"path": {{"type": "string"}}, "content": {{"type": "string"}}}}, "required": ["path", "content"]}}}}, {{"name": "edit_file", "description": "Replace exact text in file", "input_schema": {{"type": "object", "properties": {{"path": {{"type": "string"}}, "old_text": {{"type": "string"}}, "new_text": {{"type": "string"}}}}, "required": ["path", "old_text", "new_text"]}}}}, ] def safe_path(p: str) -> Path: """Prevent path escape attacks.""" path = (WORKDIR / p).resolve() if not path.is_relative_to(WORKDIR): raise ValueError(f"Path escapes workspace: {{p}}") return path def execute(name: str, args: dict) -> str: """Execute a tool and return result.""" if name == "bash": dangerous = ["rm -rf /", "sudo", "shutdown", "> /dev/"] if any(d in args["command"] for d in dangerous): return "Error: Dangerous command blocked" try: r = subprocess.run(args["command"], shell=True, cwd=WORKDIR, capture_output=True, text=True, timeout=60) return (r.stdout + r.stderr).strip()[:50000] or "(empty)" except subprocess.TimeoutExpired: return "Error: Timeout (60s)" except Exception as e: return f"Error: {{e}}" if name == "read_file": try: return safe_path(args["path"]).read_text()[:50000] except Exception as e: return f"Error: {{e}}" if name == "write_file": try: p = safe_path(args["path"]) p.parent.mkdir(parents=True, exist_ok=True) p.write_text(args["content"]) return f"Wrote {{len(args['content'])}} bytes to {{args['path']}}" except Exception as e: return f"Error: {{e}}" if name == "edit_file": try: p = safe_path(args["path"]) content = p.read_text() if args["old_text"] not in content: return f"Error: Text not found in {{args['path']}}" p.write_text(content.replace(args["old_text"], args["new_text"], 1)) return f"Edited {{args['path']}}" except Exception as e: return f"Error: {{e}}" return f"Unknown tool: {{name}}" def agent(prompt: str, history: list = None) -> str: """Run the agent loop.""" if history is None: history = [] history.append({{"role": "user", "content": prompt}}) while True: response = client.messages.create( model=MODEL, system=SYSTEM, messages=history, tools=TOOLS, max_tokens=8000 ) history.append({{"role": "assistant", "content": response.content}}) if response.stop_reason != "tool_use": return "".join(b.text for b in response.content if hasattr(b, "text")) results = [] for block in response.content: if block.type == "tool_use": print(f"> {{block.name}}: {{str(block.input)[:100]}}") output = execute(block.name, block.input) print(f" {{output[:100]}}...") results.append({{"type": "tool_result", "tool_use_id": block.id, "content": output}}) history.append({{"role": "user", "content": results}}) if __name__ == "__main__": print(f"{name} - Level 1 Agent at {{WORKDIR}}") print("Type 'q' to quit.\\n") h = [] while True: try: query = input(">> ").strip() except (EOFError, KeyboardInterrupt): break if query in ("q", "quit", "exit", ""): break print(agent(query, h), "\\n") ''', } ENV_TEMPLATE = '''# API Configuration ANTHROPIC_API_KEY=sk-xxx ANTHROPIC_BASE_URL=https://api.anthropic.com MODEL_NAME=claude-sonnet-4-20250514 ''' def create_agent(name: str, level: int, output_dir: Path): """Create a new agent project.""" # Validate level if level not in TEMPLATES and level not in (2, 3, 4): print(f"Error: Level {level} not yet implemented in scaffold.") print("Available levels: 0 (minimal), 1 (4 tools)") print("For levels 2-4, copy from mini-claude-code repository.") sys.exit(1) # Create output directory agent_dir = output_dir / name agent_dir.mkdir(parents=True, exist_ok=True) # Write agent file agent_file = agent_dir / f"{name}.py" template = TEMPLATES.get(level, TEMPLATES[1]) agent_file.write_text(template.format(name=name)) print(f"Created: {agent_file}") # Write .env.example env_file = agent_dir / ".env.example" env_file.write_text(ENV_TEMPLATE) print(f"Created: {env_file}") # Write .gitignore gitignore = agent_dir / ".gitignore" gitignore.write_text(".env\n__pycache__/\n*.pyc\n") print(f"Created: {gitignore}") print(f"\nAgent '{name}' created at {agent_dir}") print(f"\nNext steps:") print(f" 1. cd {agent_dir}") print(f" 2. cp .env.example .env") print(f" 3. Edit .env with your API key") print(f" 4. pip install anthropic python-dotenv") print(f" 5. python {name}.py") def main(): parser = argparse.ArgumentParser( description="Scaffold a new AI coding agent project", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Levels: 0 Minimal (~50 lines) - Single bash tool, self-recursion for subagents 1 Basic (~200 lines) - 4 core tools: bash, read, write, edit 2 Todo (~300 lines) - + TodoWrite for structured planning 3 Subagent (~450) - + Task tool for context isolation 4 Skills (~550) - + Skill tool for domain expertise """ ) parser.add_argument("name", help="Name of the agent to create") parser.add_argument("--level", type=int, default=1, choices=[0, 1, 2, 3, 4], help="Complexity level (default: 1)") parser.add_argument("--path", type=Path, default=Path.cwd(), help="Output directory (default: current directory)") args = parser.parse_args() create_agent(args.name, args.level, args.path) if __name__ == "__main__": main() ================================================ FILE: skills/code-review/SKILL.md ================================================ --- name: code-review description: Perform thorough code reviews with security, performance, and maintainability analysis. Use when user asks to review code, check for bugs, or audit a codebase. --- # Code Review Skill You now have expertise in conducting comprehensive code reviews. Follow this structured approach: ## Review Checklist ### 1. Security (Critical) Check for: - [ ] **Injection vulnerabilities**: SQL, command, XSS, template injection - [ ] **Authentication issues**: Hardcoded credentials, weak auth - [ ] **Authorization flaws**: Missing access controls, IDOR - [ ] **Data exposure**: Sensitive data in logs, error messages - [ ] **Cryptography**: Weak algorithms, improper key management - [ ] **Dependencies**: Known vulnerabilities (check with `npm audit`, `pip-audit`) ```bash # Quick security scans npm audit # Node.js pip-audit # Python cargo audit # Rust grep -r "password\|secret\|api_key" --include="*.py" --include="*.js" ``` ### 2. Correctness Check for: - [ ] **Logic errors**: Off-by-one, null handling, edge cases - [ ] **Race conditions**: Concurrent access without synchronization - [ ] **Resource leaks**: Unclosed files, connections, memory - [ ] **Error handling**: Swallowed exceptions, missing error paths - [ ] **Type safety**: Implicit conversions, any types ### 3. Performance Check for: - [ ] **N+1 queries**: Database calls in loops - [ ] **Memory issues**: Large allocations, retained references - [ ] **Blocking operations**: Sync I/O in async code - [ ] **Inefficient algorithms**: O(n^2) when O(n) possible - [ ] **Missing caching**: Repeated expensive computations ### 4. Maintainability Check for: - [ ] **Naming**: Clear, consistent, descriptive - [ ] **Complexity**: Functions > 50 lines, deep nesting > 3 levels - [ ] **Duplication**: Copy-pasted code blocks - [ ] **Dead code**: Unused imports, unreachable branches - [ ] **Comments**: Outdated, redundant, or missing where needed ### 5. Testing Check for: - [ ] **Coverage**: Critical paths tested - [ ] **Edge cases**: Null, empty, boundary values - [ ] **Mocking**: External dependencies isolated - [ ] **Assertions**: Meaningful, specific checks ## Review Output Format ```markdown ## Code Review: [file/component name] ### Summary [1-2 sentence overview] ### Critical Issues 1. **[Issue]** (line X): [Description] - Impact: [What could go wrong] - Fix: [Suggested solution] ### Improvements 1. **[Suggestion]** (line X): [Description] ### Positive Notes - [What was done well] ### Verdict [ ] Ready to merge [ ] Needs minor changes [ ] Needs major revision ``` ## Common Patterns to Flag ### Python ```python # Bad: SQL injection cursor.execute(f"SELECT * FROM users WHERE id = {user_id}") # Good: cursor.execute("SELECT * FROM users WHERE id = ?", (user_id,)) # Bad: Command injection os.system(f"ls {user_input}") # Good: subprocess.run(["ls", user_input], check=True) # Bad: Mutable default argument def append(item, lst=[]): # Bug: shared mutable default # Good: def append(item, lst=None): lst = lst or [] ``` ### JavaScript/TypeScript ```javascript // Bad: Prototype pollution Object.assign(target, userInput) // Good: Object.assign(target, sanitize(userInput)) // Bad: eval usage eval(userCode) // Good: Never use eval with user input // Bad: Callback hell getData(x => process(x, y => save(y, z => done(z)))) // Good: const data = await getData(); const processed = await process(data); await save(processed); ``` ## Review Commands ```bash # Show recent changes git diff HEAD~5 --stat git log --oneline -10 # Find potential issues grep -rn "TODO\|FIXME\|HACK\|XXX" . grep -rn "password\|secret\|token" . --include="*.py" # Check complexity (Python) pip install radon && radon cc . -a # Check dependencies npm outdated # Node pip list --outdated # Python ``` ## Review Workflow 1. **Understand context**: Read PR description, linked issues 2. **Run the code**: Build, test, run locally if possible 3. **Read top-down**: Start with main entry points 4. **Check tests**: Are changes tested? Do tests pass? 5. **Security scan**: Run automated tools 6. **Manual review**: Use checklist above 7. **Write feedback**: Be specific, suggest fixes, be kind ================================================ FILE: skills/mcp-builder/SKILL.md ================================================ --- name: mcp-builder description: Build MCP (Model Context Protocol) servers that give Claude new capabilities. Use when user wants to create an MCP server, add tools to Claude, or integrate external services. --- # MCP Server Building Skill You now have expertise in building MCP (Model Context Protocol) servers. MCP enables Claude to interact with external services through a standardized protocol. ## What is MCP? MCP servers expose: - **Tools**: Functions Claude can call (like API endpoints) - **Resources**: Data Claude can read (like files or database records) - **Prompts**: Pre-built prompt templates ## Quick Start: Python MCP Server ### 1. Project Setup ```bash # Create project mkdir my-mcp-server && cd my-mcp-server python3 -m venv venv && source venv/bin/activate # Install MCP SDK pip install mcp ``` ### 2. Basic Server Template ```python #!/usr/bin/env python3 """my_server.py - A simple MCP server""" from mcp.server import Server from mcp.server.stdio import stdio_server from mcp.types import Tool, TextContent # Create server instance server = Server("my-server") # Define a tool @server.tool() async def hello(name: str) -> str: """Say hello to someone. Args: name: The name to greet """ return f"Hello, {name}!" @server.tool() async def add_numbers(a: int, b: int) -> str: """Add two numbers together. Args: a: First number b: Second number """ return str(a + b) # Run server async def main(): async with stdio_server() as (read, write): await server.run(read, write) if __name__ == "__main__": import asyncio asyncio.run(main()) ``` ### 3. Register with Claude Add to `~/.claude/mcp.json`: ```json { "mcpServers": { "my-server": { "command": "python3", "args": ["/path/to/my_server.py"] } } } ``` ## TypeScript MCP Server ### 1. Setup ```bash mkdir my-mcp-server && cd my-mcp-server npm init -y npm install @modelcontextprotocol/sdk ``` ### 2. Template ```typescript // src/index.ts import { Server } from "@modelcontextprotocol/sdk/server/index.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; const server = new Server({ name: "my-server", version: "1.0.0", }); // Define tools server.setRequestHandler("tools/list", async () => ({ tools: [ { name: "hello", description: "Say hello to someone", inputSchema: { type: "object", properties: { name: { type: "string", description: "Name to greet" }, }, required: ["name"], }, }, ], })); server.setRequestHandler("tools/call", async (request) => { if (request.params.name === "hello") { const name = request.params.arguments.name; return { content: [{ type: "text", text: `Hello, ${name}!` }] }; } throw new Error("Unknown tool"); }); // Start server const transport = new StdioServerTransport(); server.connect(transport); ``` ## Advanced Patterns ### External API Integration ```python import httpx from mcp.server import Server server = Server("weather-server") @server.tool() async def get_weather(city: str) -> str: """Get current weather for a city.""" async with httpx.AsyncClient() as client: resp = await client.get( f"https://api.weatherapi.com/v1/current.json", params={"key": "YOUR_API_KEY", "q": city} ) data = resp.json() return f"{city}: {data['current']['temp_c']}C, {data['current']['condition']['text']}" ``` ### Database Access ```python import sqlite3 from mcp.server import Server server = Server("db-server") @server.tool() async def query_db(sql: str) -> str: """Execute a read-only SQL query.""" if not sql.strip().upper().startswith("SELECT"): return "Error: Only SELECT queries allowed" conn = sqlite3.connect("data.db") cursor = conn.execute(sql) rows = cursor.fetchall() conn.close() return str(rows) ``` ### Resources (Read-only Data) ```python @server.resource("config://settings") async def get_settings() -> str: """Application settings.""" return open("settings.json").read() @server.resource("file://{path}") async def read_file(path: str) -> str: """Read a file from the workspace.""" return open(path).read() ``` ## Testing ```bash # Test with MCP Inspector npx @anthropics/mcp-inspector python3 my_server.py # Or send test messages directly echo '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' | python3 my_server.py ``` ## Best Practices 1. **Clear tool descriptions**: Claude uses these to decide when to call tools 2. **Input validation**: Always validate and sanitize inputs 3. **Error handling**: Return meaningful error messages 4. **Async by default**: Use async/await for I/O operations 5. **Security**: Never expose sensitive operations without auth 6. **Idempotency**: Tools should be safe to retry ================================================ FILE: skills/pdf/SKILL.md ================================================ --- name: pdf description: Process PDF files - extract text, create PDFs, merge documents. Use when user asks to read PDF, create PDF, or work with PDF files. --- # PDF Processing Skill You now have expertise in PDF manipulation. Follow these workflows: ## Reading PDFs **Option 1: Quick text extraction (preferred)** ```bash # Using pdftotext (poppler-utils) pdftotext input.pdf - # Output to stdout pdftotext input.pdf output.txt # Output to file # If pdftotext not available, try: python3 -c " import fitz # PyMuPDF doc = fitz.open('input.pdf') for page in doc: print(page.get_text()) " ``` **Option 2: Page-by-page with metadata** ```python import fitz # pip install pymupdf doc = fitz.open("input.pdf") print(f"Pages: {len(doc)}") print(f"Metadata: {doc.metadata}") for i, page in enumerate(doc): text = page.get_text() print(f"--- Page {i+1} ---") print(text) ``` ## Creating PDFs **Option 1: From Markdown (recommended)** ```bash # Using pandoc pandoc input.md -o output.pdf # With custom styling pandoc input.md -o output.pdf --pdf-engine=xelatex -V geometry:margin=1in ``` **Option 2: Programmatically** ```python from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas c = canvas.Canvas("output.pdf", pagesize=letter) c.drawString(100, 750, "Hello, PDF!") c.save() ``` **Option 3: From HTML** ```bash # Using wkhtmltopdf wkhtmltopdf input.html output.pdf # Or with Python python3 -c " import pdfkit pdfkit.from_file('input.html', 'output.pdf') " ``` ## Merging PDFs ```python import fitz result = fitz.open() for pdf_path in ["file1.pdf", "file2.pdf", "file3.pdf"]: doc = fitz.open(pdf_path) result.insert_pdf(doc) result.save("merged.pdf") ``` ## Splitting PDFs ```python import fitz doc = fitz.open("input.pdf") for i in range(len(doc)): single = fitz.open() single.insert_pdf(doc, from_page=i, to_page=i) single.save(f"page_{i+1}.pdf") ``` ## Key Libraries | Task | Library | Install | |------|---------|---------| | Read/Write/Merge | PyMuPDF | `pip install pymupdf` | | Create from scratch | ReportLab | `pip install reportlab` | | HTML to PDF | pdfkit | `pip install pdfkit` + wkhtmltopdf | | Text extraction | pdftotext | `brew install poppler` / `apt install poppler-utils` | ## Best Practices 1. **Always check if tools are installed** before using them 2. **Handle encoding issues** - PDFs may contain various character encodings 3. **Large PDFs**: Process page by page to avoid memory issues 4. **OCR for scanned PDFs**: Use `pytesseract` if text extraction returns empty ================================================ FILE: web/.gitignore ================================================ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. # dependencies /node_modules /.pnp .pnp.* .yarn/* !.yarn/patches !.yarn/plugins !.yarn/releases !.yarn/versions # testing /coverage # next.js /.next/ /out/ # production /build # misc .DS_Store *.pem # debug npm-debug.log* yarn-debug.log* yarn-error.log* .pnpm-debug.log* # env files (can opt-in for committing if needed) .env* # vercel .vercel # typescript *.tsbuildinfo next-env.d.ts .env*.local ================================================ FILE: web/README.md ================================================ This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app). ## Getting Started First, run the development server: ```bash npm run dev # or yarn dev # or pnpm dev # or bun dev ``` Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file. This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel. ## Learn More To learn more about Next.js, take a look at the following resources: - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome! ## Deploy on Vercel The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details. ================================================ FILE: web/next.config.ts ================================================ import type { NextConfig } from "next"; const nextConfig: NextConfig = { output: "export", images: { unoptimized: true }, trailingSlash: true, }; export default nextConfig; ================================================ FILE: web/package.json ================================================ { "name": "web", "version": "0.1.0", "private": true, "scripts": { "extract": "tsx scripts/extract-content.ts", "predev": "npm run extract", "dev": "next dev", "prebuild": "npm run extract", "build": "next build", "start": "next start" }, "dependencies": { "diff": "^8.0.3", "framer-motion": "^12.34.0", "lucide-react": "^0.564.0", "next": "16.1.6", "react": "19.2.3", "react-dom": "19.2.3", "rehype-highlight": "^7.0.2", "rehype-raw": "^7.0.0", "rehype-stringify": "^10.0.1", "remark-gfm": "^4.0.1", "remark-parse": "^11.0.0", "remark-rehype": "^11.1.2", "tsx": "^4.21.0", "unified": "^11.0.5" }, "devDependencies": { "@tailwindcss/postcss": "^4", "@types/diff": "^7.0.2", "@types/node": "^20", "@types/react": "^19", "@types/react-dom": "^19", "tailwindcss": "^4", "typescript": "^5" } } ================================================ FILE: web/postcss.config.mjs ================================================ const config = { plugins: { "@tailwindcss/postcss": {}, }, }; export default config; ================================================ FILE: web/scripts/extract-content.ts ================================================ import * as fs from "fs"; import * as path from "path"; import type { AgentVersion, VersionDiff, DocContent, VersionIndex, } from "../src/types/agent-data"; import { VERSION_META, VERSION_ORDER, LEARNING_PATH } from "../src/lib/constants"; // Resolve paths relative to this script's location (web/scripts/) const WEB_DIR = path.resolve(__dirname, ".."); const REPO_ROOT = path.resolve(WEB_DIR, ".."); const AGENTS_DIR = path.join(REPO_ROOT, "agents"); const DOCS_DIR = path.join(REPO_ROOT, "docs"); const OUT_DIR = path.join(WEB_DIR, "src", "data", "generated"); // Map python filenames to version IDs // s01_agent_loop.py -> s01 // s02_tools.py -> s02 // s_full.py -> s_full (reference agent, typically skipped) function filenameToVersionId(filename: string): string | null { const base = path.basename(filename, ".py"); if (base === "s_full") return null; if (base === "__init__") return null; const match = base.match(/^(s\d+[a-c]?)_/); if (!match) return null; return match[1]; } // Extract classes from Python source function extractClasses( lines: string[] ): { name: string; startLine: number; endLine: number }[] { const classes: { name: string; startLine: number; endLine: number }[] = []; const classPattern = /^class\s+(\w+)/; for (let i = 0; i < lines.length; i++) { const m = lines[i].match(classPattern); if (m) { const name = m[1]; const startLine = i + 1; // Find end of class: next class/function at indent 0, or EOF let endLine = lines.length; for (let j = i + 1; j < lines.length; j++) { if ( lines[j].match(/^class\s/) || lines[j].match(/^def\s/) || (lines[j].match(/^\S/) && lines[j].trim() !== "" && !lines[j].startsWith("#") && !lines[j].startsWith("@")) ) { endLine = j; break; } } classes.push({ name, startLine, endLine }); } } return classes; } // Extract top-level functions from Python source function extractFunctions( lines: string[] ): { name: string; signature: string; startLine: number }[] { const functions: { name: string; signature: string; startLine: number }[] = []; const funcPattern = /^def\s+(\w+)\((.*?)\)/; for (let i = 0; i < lines.length; i++) { const m = lines[i].match(funcPattern); if (m) { functions.push({ name: m[1], signature: `def ${m[1]}(${m[2]})`, startLine: i + 1, }); } } return functions; } // Extract tool names from Python source // Looks for "name": "tool_name" patterns in dict literals function extractTools(source: string): string[] { const toolPattern = /"name"\s*:\s*"(\w+)"/g; const tools = new Set(); let m; while ((m = toolPattern.exec(source)) !== null) { tools.add(m[1]); } return Array.from(tools); } // Count non-blank, non-comment lines function countLoc(lines: string[]): number { return lines.filter((line) => { const trimmed = line.trim(); return trimmed !== "" && !trimmed.startsWith("#"); }).length; } // Detect locale from subdirectory path // docs/en/s01-the-agent-loop.md -> "en" // docs/zh/s01-the-agent-loop.md -> "zh" // docs/ja/s01-the-agent-loop.md -> "ja" function detectLocale(relPath: string): "en" | "zh" | "ja" { if (relPath.startsWith("zh/") || relPath.startsWith("zh\\")) return "zh"; if (relPath.startsWith("ja/") || relPath.startsWith("ja\\")) return "ja"; return "en"; } // Extract version from doc filename (e.g., "s01-the-agent-loop.md" -> "s01") function extractDocVersion(filename: string): string | null { const m = filename.match(/^(s\d+[a-c]?)-/); return m ? m[1] : null; } // Main extraction function main() { console.log("Extracting content from agents and docs..."); console.log(` Repo root: ${REPO_ROOT}`); console.log(` Agents dir: ${AGENTS_DIR}`); console.log(` Docs dir: ${DOCS_DIR}`); // Skip extraction if source directories don't exist (e.g. Vercel build). // Pre-committed generated data will be used instead. if (!fs.existsSync(AGENTS_DIR)) { console.log(" Agents directory not found, skipping extraction."); console.log(" Using pre-committed generated data."); return; } // 1. Read all agent files const agentFiles = fs .readdirSync(AGENTS_DIR) .filter((f) => f.startsWith("s") && f.endsWith(".py")); console.log(` Found ${agentFiles.length} agent files`); const versions: AgentVersion[] = []; for (const filename of agentFiles) { const versionId = filenameToVersionId(filename); if (!versionId) { console.warn(` Skipping ${filename}: could not determine version ID`); continue; } const filePath = path.join(AGENTS_DIR, filename); const source = fs.readFileSync(filePath, "utf-8"); const lines = source.split("\n"); const meta = VERSION_META[versionId]; const classes = extractClasses(lines); const functions = extractFunctions(lines); const tools = extractTools(source); const loc = countLoc(lines); versions.push({ id: versionId, filename, title: meta?.title ?? versionId, subtitle: meta?.subtitle ?? "", loc, tools, newTools: [], // computed after all versions are loaded coreAddition: meta?.coreAddition ?? "", keyInsight: meta?.keyInsight ?? "", classes, functions, layer: meta?.layer ?? "tools", source, }); } // Sort versions according to VERSION_ORDER const orderMap = new Map(VERSION_ORDER.map((v, i) => [v, i])); versions.sort( (a, b) => (orderMap.get(a.id as any) ?? 99) - (orderMap.get(b.id as any) ?? 99) ); // 2. Compute newTools for each version for (let i = 0; i < versions.length; i++) { const prev = i > 0 ? new Set(versions[i - 1].tools) : new Set(); versions[i].newTools = versions[i].tools.filter((t) => !prev.has(t)); } // 3. Compute diffs between adjacent versions in LEARNING_PATH const diffs: VersionDiff[] = []; const versionMap = new Map(versions.map((v) => [v.id, v])); for (let i = 1; i < LEARNING_PATH.length; i++) { const fromId = LEARNING_PATH[i - 1]; const toId = LEARNING_PATH[i]; const fromVer = versionMap.get(fromId); const toVer = versionMap.get(toId); if (!fromVer || !toVer) continue; const fromClassNames = new Set(fromVer.classes.map((c) => c.name)); const fromFuncNames = new Set(fromVer.functions.map((f) => f.name)); const fromToolNames = new Set(fromVer.tools); diffs.push({ from: fromId, to: toId, newClasses: toVer.classes .map((c) => c.name) .filter((n) => !fromClassNames.has(n)), newFunctions: toVer.functions .map((f) => f.name) .filter((n) => !fromFuncNames.has(n)), newTools: toVer.tools.filter((t) => !fromToolNames.has(t)), locDelta: toVer.loc - fromVer.loc, }); } // 4. Read doc files from locale subdirectories (en/, zh/, ja/) const docs: DocContent[] = []; if (fs.existsSync(DOCS_DIR)) { const localeDirs = ["en", "zh", "ja"]; let totalDocFiles = 0; for (const locale of localeDirs) { const localeDir = path.join(DOCS_DIR, locale); if (!fs.existsSync(localeDir)) continue; const docFiles = fs .readdirSync(localeDir) .filter((f) => f.endsWith(".md")); totalDocFiles += docFiles.length; for (const filename of docFiles) { const version = extractDocVersion(filename); if (!version) { console.warn(` Skipping doc ${locale}/${filename}: could not determine version`); continue; } const filePath = path.join(localeDir, filename); const content = fs.readFileSync(filePath, "utf-8"); const titleMatch = content.match(/^#\s+(.+)$/m); const title = titleMatch ? titleMatch[1] : filename; docs.push({ version, locale: locale as "en" | "zh" | "ja", title, content }); } } console.log(` Found ${totalDocFiles} doc files across ${localeDirs.length} locales`); } else { console.warn(` Docs directory not found: ${DOCS_DIR}`); } // 5. Write output fs.mkdirSync(OUT_DIR, { recursive: true }); const index: VersionIndex = { versions, diffs }; const indexPath = path.join(OUT_DIR, "versions.json"); fs.writeFileSync(indexPath, JSON.stringify(index, null, 2)); console.log(` Wrote ${indexPath}`); const docsPath = path.join(OUT_DIR, "docs.json"); fs.writeFileSync(docsPath, JSON.stringify(docs, null, 2)); console.log(` Wrote ${docsPath}`); // Summary console.log("\nExtraction complete:"); console.log(` ${versions.length} versions`); console.log(` ${diffs.length} diffs`); console.log(` ${docs.length} docs`); for (const v of versions) { console.log( ` ${v.id}: ${v.loc} LOC, ${v.tools.length} tools, ${v.classes.length} classes, ${v.functions.length} functions` ); } } main(); ================================================ FILE: web/src/app/[locale]/(learn)/[version]/client.tsx ================================================ "use client"; import { ArchDiagram } from "@/components/architecture/arch-diagram"; import { WhatsNew } from "@/components/diff/whats-new"; import { DesignDecisions } from "@/components/architecture/design-decisions"; import { DocRenderer } from "@/components/docs/doc-renderer"; import { SourceViewer } from "@/components/code/source-viewer"; import { AgentLoopSimulator } from "@/components/simulator/agent-loop-simulator"; import { ExecutionFlow } from "@/components/architecture/execution-flow"; import { SessionVisualization } from "@/components/visualizations"; import { Tabs } from "@/components/ui/tabs"; import { useTranslations } from "@/lib/i18n"; interface VersionDetailClientProps { version: string; diff: { from: string; to: string; newClasses: string[]; newFunctions: string[]; newTools: string[]; locDelta: number; } | null; source: string; filename: string; } export function VersionDetailClient({ version, diff, source, filename, }: VersionDetailClientProps) { const t = useTranslations("version"); const tabs = [ { id: "learn", label: t("tab_learn") }, { id: "simulate", label: t("tab_simulate") }, { id: "code", label: t("tab_code") }, { id: "deep-dive", label: t("tab_deep_dive") }, ]; return (
{/* Hero Visualization */} {/* Tabbed content */} {(activeTab) => ( <> {activeTab === "learn" && } {activeTab === "simulate" && ( )} {activeTab === "code" && ( )} {activeTab === "deep-dive" && (

{t("execution_flow")}

{t("architecture")}

{diff && }
)} )}
); } ================================================ FILE: web/src/app/[locale]/(learn)/[version]/diff/diff-content.tsx ================================================ "use client"; import { useMemo } from "react"; import Link from "next/link"; import { useLocale } from "@/lib/i18n"; import { VERSION_META } from "@/lib/constants"; import { Card, CardHeader, CardTitle } from "@/components/ui/card"; import { LayerBadge } from "@/components/ui/badge"; import { CodeDiff } from "@/components/diff/code-diff"; import { ArrowLeft, Plus, Minus, FileCode, Wrench, Box, FunctionSquare } from "lucide-react"; import type { AgentVersion, VersionDiff, VersionIndex } from "@/types/agent-data"; import versionData from "@/data/generated/versions.json"; const data = versionData as VersionIndex; interface DiffPageContentProps { version: string; } export function DiffPageContent({ version }: DiffPageContentProps) { const locale = useLocale(); const meta = VERSION_META[version]; const { currentVersion, prevVersion, diff } = useMemo(() => { const current = data.versions.find((v) => v.id === version); const prevId = meta?.prevVersion; const prev = prevId ? data.versions.find((v) => v.id === prevId) : null; const d = data.diffs.find((d) => d.to === version); return { currentVersion: current, prevVersion: prev, diff: d }; }, [version, meta]); if (!meta || !currentVersion) { return (

Version not found.

Back to timeline
); } if (!prevVersion || !diff) { return (
Back to {meta.title}

{meta.title}

This is the first version -- there is no previous version to compare against.

); } const prevMeta = VERSION_META[prevVersion.id]; return (
Back to {meta.title} {/* Header */}

{prevMeta?.title || prevVersion.id} → {meta.title}

{prevVersion.id} ({prevVersion.loc} LOC) → {version} ({currentVersion.loc} LOC)

{/* Structural Diff */}
LOC Delta
= 0 ? "text-green-600 dark:text-green-400" : "text-red-600 dark:text-red-400"}> {diff.locDelta >= 0 ? "+" : ""}{diff.locDelta} lines
New Tools
{diff.newTools.length} {diff.newTools.length > 0 && (
{diff.newTools.map((tool) => ( {tool} ))}
)}
New Classes
{diff.newClasses.length} {diff.newClasses.length > 0 && (
{diff.newClasses.map((cls) => ( {cls} ))}
)}
New Functions
{diff.newFunctions.length} {diff.newFunctions.length > 0 && (
{diff.newFunctions.map((fn) => ( {fn} ))}
)}
{/* Version Info Comparison */}
{prevMeta?.title || prevVersion.id}

{prevMeta?.subtitle}

{prevVersion.loc} LOC

{prevVersion.tools.length} tools: {prevVersion.tools.join(", ")}

{prevVersion.layer}
{meta.title}

{meta.subtitle}

{currentVersion.loc} LOC

{currentVersion.tools.length} tools: {currentVersion.tools.join(", ")}

{currentVersion.layer}
{/* Code Diff */}

Source Code Diff

); } ================================================ FILE: web/src/app/[locale]/(learn)/[version]/diff/page.tsx ================================================ import { LEARNING_PATH } from "@/lib/constants"; import { DiffPageContent } from "./diff-content"; export function generateStaticParams() { return LEARNING_PATH.map((version) => ({ version })); } export default async function DiffPage({ params, }: { params: Promise<{ locale: string; version: string }>; }) { const { version } = await params; return ; } ================================================ FILE: web/src/app/[locale]/(learn)/[version]/page.tsx ================================================ import Link from "next/link"; import { LEARNING_PATH, VERSION_META, LAYERS } from "@/lib/constants"; import { LayerBadge } from "@/components/ui/badge"; import versionsData from "@/data/generated/versions.json"; import { VersionDetailClient } from "./client"; import { getTranslations } from "@/lib/i18n-server"; export function generateStaticParams() { return LEARNING_PATH.map((version) => ({ version })); } export default async function VersionPage({ params, }: { params: Promise<{ locale: string; version: string }>; }) { const { locale, version } = await params; const versionData = versionsData.versions.find((v) => v.id === version); const meta = VERSION_META[version]; const diff = versionsData.diffs.find((d) => d.to === version) ?? null; if (!versionData || !meta) { return (

Version not found

{version}

); } const t = getTranslations(locale, "version"); const tSession = getTranslations(locale, "sessions"); const tLayer = getTranslations(locale, "layer_labels"); const layer = LAYERS.find((l) => l.id === meta.layer); const pathIndex = LEARNING_PATH.indexOf(version as typeof LEARNING_PATH[number]); const prevVersion = pathIndex > 0 ? LEARNING_PATH[pathIndex - 1] : null; const nextVersion = pathIndex < LEARNING_PATH.length - 1 ? LEARNING_PATH[pathIndex + 1] : null; return (
{/* Header */}
{version}

{tSession(version) || meta.title}

{layer && ( {tLayer(layer.id)} )}

{meta.subtitle}

{versionData.loc} LOC {versionData.tools.length} {t("tools")} {meta.coreAddition && ( {meta.coreAddition} )}
{meta.keyInsight && (
{meta.keyInsight}
)}
{/* Client-rendered interactive sections */} {/* Prev / Next navigation */}
); } ================================================ FILE: web/src/app/[locale]/(learn)/compare/page.tsx ================================================ "use client"; import { useState, useMemo } from "react"; import { useLocale, useTranslations } from "@/lib/i18n"; import { LEARNING_PATH, VERSION_META } from "@/lib/constants"; import { Card, CardHeader, CardTitle } from "@/components/ui/card"; import { LayerBadge } from "@/components/ui/badge"; import { CodeDiff } from "@/components/diff/code-diff"; import { ArchDiagram } from "@/components/architecture/arch-diagram"; import { ArrowRight, FileCode, Wrench, Box, FunctionSquare } from "lucide-react"; import type { VersionIndex } from "@/types/agent-data"; import versionData from "@/data/generated/versions.json"; const data = versionData as VersionIndex; export default function ComparePage() { const t = useTranslations("compare"); const locale = useLocale(); const [versionA, setVersionA] = useState(""); const [versionB, setVersionB] = useState(""); const infoA = useMemo(() => data.versions.find((v) => v.id === versionA), [versionA]); const infoB = useMemo(() => data.versions.find((v) => v.id === versionB), [versionB]); const metaA = versionA ? VERSION_META[versionA] : null; const metaB = versionB ? VERSION_META[versionB] : null; const comparison = useMemo(() => { if (!infoA || !infoB) return null; const toolsA = new Set(infoA.tools); const toolsB = new Set(infoB.tools); const onlyA = infoA.tools.filter((t) => !toolsB.has(t)); const onlyB = infoB.tools.filter((t) => !toolsA.has(t)); const shared = infoA.tools.filter((t) => toolsB.has(t)); const classesA = new Set(infoA.classes.map((c) => c.name)); const classesB = new Set(infoB.classes.map((c) => c.name)); const newClasses = infoB.classes.map((c) => c.name).filter((c) => !classesA.has(c)); const funcsA = new Set(infoA.functions.map((f) => f.name)); const funcsB = new Set(infoB.functions.map((f) => f.name)); const newFunctions = infoB.functions.map((f) => f.name).filter((f) => !funcsA.has(f)); return { locDelta: infoB.loc - infoA.loc, toolsOnlyA: onlyA, toolsOnlyB: onlyB, toolsShared: shared, newClasses, newFunctions, }; }, [infoA, infoB]); return (

{t("title")}

{t("subtitle")}

{/* Selectors */}
{/* Results */} {infoA && infoB && comparison && (
{/* Side-by-side version info */}
{metaA?.title || versionA}

{metaA?.subtitle}

{infoA.loc} LOC

{infoA.tools.length} tools

{metaA && {metaA.layer}}
{metaB?.title || versionB}

{metaB?.subtitle}

{infoB.loc} LOC

{infoB.tools.length} tools

{metaB && {metaB.layer}}
{/* Side-by-side Architecture Diagrams */}

{t("architecture")}

{metaA?.title || versionA}

{metaB?.title || versionB}

{/* Structural diff */}
{t("loc_delta")}
= 0 ? "text-green-600 dark:text-green-400" : "text-red-600 dark:text-red-400"}> {comparison.locDelta >= 0 ? "+" : ""}{comparison.locDelta} {t("lines")}
{t("new_tools_in_b")}
{comparison.toolsOnlyB.length} {comparison.toolsOnlyB.length > 0 && (
{comparison.toolsOnlyB.map((tool) => ( {tool} ))}
)}
{t("new_classes_in_b")}
{comparison.newClasses.length} {comparison.newClasses.length > 0 && (
{comparison.newClasses.map((cls) => ( {cls} ))}
)}
{t("new_functions_in_b")}
{comparison.newFunctions.length} {comparison.newFunctions.length > 0 && (
{comparison.newFunctions.map((fn) => ( {fn} ))}
)}
{/* Tool comparison */} {t("tool_comparison")}

{t("only_in")} {metaA?.title || versionA}

{comparison.toolsOnlyA.length === 0 ? (

{t("none")}

) : (
{comparison.toolsOnlyA.map((tool) => ( {tool} ))}
)}

{t("shared")}

{comparison.toolsShared.length === 0 ? (

{t("none")}

) : (
{comparison.toolsShared.map((tool) => ( {tool} ))}
)}

{t("only_in")} {metaB?.title || versionB}

{comparison.toolsOnlyB.length === 0 ? (

{t("none")}

) : (
{comparison.toolsOnlyB.map((tool) => ( {tool} ))}
)}
{/* Code Diff */}

{t("source_diff")}

)} {/* Empty state */} {(!versionA || !versionB) && (

{t("empty_hint")}

)}
); } ================================================ FILE: web/src/app/[locale]/(learn)/layers/page.tsx ================================================ "use client"; import Link from "next/link"; import { useTranslations, useLocale } from "@/lib/i18n"; import { LAYERS, VERSION_META } from "@/lib/constants"; import { Card, CardHeader, CardTitle } from "@/components/ui/card"; import { LayerBadge } from "@/components/ui/badge"; import { cn } from "@/lib/utils"; import { ChevronRight } from "lucide-react"; import type { VersionIndex } from "@/types/agent-data"; import versionData from "@/data/generated/versions.json"; const data = versionData as VersionIndex; const LAYER_BORDER_CLASSES: Record = { tools: "border-l-blue-500", planning: "border-l-emerald-500", memory: "border-l-purple-500", concurrency: "border-l-amber-500", collaboration: "border-l-red-500", }; const LAYER_HEADER_BG: Record = { tools: "bg-blue-500", planning: "bg-emerald-500", memory: "bg-purple-500", concurrency: "bg-amber-500", collaboration: "bg-red-500", }; export default function LayersPage() { const t = useTranslations("layers"); const locale = useLocale(); return (

{t("title")}

{t("subtitle")}

{LAYERS.map((layer, index) => { const versionInfos = layer.versions.map((vId) => { const info = data.versions.find((v) => v.id === vId); const meta = VERSION_META[vId]; return { id: vId, info, meta }; }); return (
{/* Layer header */}

L{index + 1} {" "} {layer.label}

{t(layer.id)}

{/* Version cards within this layer */}
{versionInfos.map(({ id, info, meta }) => (
{id} {layer.id}

{meta?.title || id}

{meta?.subtitle && (

{meta.subtitle}

)}
{info?.loc ?? "?"} LOC {info?.tools.length ?? "?"} tools
{meta?.keyInsight && (

{meta.keyInsight}

)}
))}
{/* Composition indicator */} {index < LAYERS.length - 1 && (
)}
); })}
); } ================================================ FILE: web/src/app/[locale]/(learn)/layout.tsx ================================================ import { Sidebar } from "@/components/layout/sidebar"; export default function LearnLayout({ children, }: { children: React.ReactNode; }) { return (
{children}
); } ================================================ FILE: web/src/app/[locale]/(learn)/timeline/page.tsx ================================================ "use client"; import { useTranslations } from "@/lib/i18n"; import { Timeline } from "@/components/timeline/timeline"; export default function TimelinePage() { const t = useTranslations("timeline"); return (

{t("title")}

{t("subtitle")}

); } ================================================ FILE: web/src/app/[locale]/layout.tsx ================================================ import type { Metadata } from "next"; import { I18nProvider } from "@/lib/i18n"; import { Header } from "@/components/layout/header"; import en from "@/i18n/messages/en.json"; import zh from "@/i18n/messages/zh.json"; import ja from "@/i18n/messages/ja.json"; import "../globals.css"; const locales = ["en", "zh", "ja"]; const metaMessages: Record = { en, zh, ja }; export function generateStaticParams() { return locales.map((locale) => ({ locale })); } export async function generateMetadata({ params, }: { params: Promise<{ locale: string }>; }): Promise { const { locale } = await params; const messages = metaMessages[locale] || metaMessages.en; return { title: messages.meta?.title || "Learn Claude Code", description: messages.meta?.description || "Build an AI coding agent from scratch, one concept at a time", }; } export default async function RootLayout({ children, params, }: { children: React.ReactNode; params: Promise<{ locale: string }>; }) { const { locale } = await params; return (