Version not found.
Back to timelineThis is the first version -- there is no previous version to compare against.
{prevVersion.id} ({prevVersion.loc} LOC) → {version} ({currentVersion.loc} LOC)
{prevMeta?.subtitle}
{prevVersion.loc} LOC
{prevVersion.tools.length} tools: {prevVersion.tools.join(", ")}
{meta.subtitle}
{currentVersion.loc} LOC
{currentVersion.tools.length} tools: {currentVersion.tools.join(", ")}
{version}
{meta.subtitle}
{meta.keyInsight})}
{t("subtitle")}
{metaA?.subtitle}
{infoA.loc} LOC
{infoA.tools.length} tools
{metaA &&{metaB?.subtitle}
{infoB.loc} LOC
{infoB.tools.length} tools
{metaB &&{t("none")}
) : ({t("none")}
) : ({t("none")}
) : ({t("empty_hint")}
{t("subtitle")}
{t(layer.id)}
{meta.subtitle}
)}{meta.keyInsight}
)}{t("subtitle")}
{t("hero_subtitle")}
{t("core_pattern_desc")}
while
True
:
{"\n"}
{" "}response = client.messages.
create
(
messages=
messages
,
tools=
tools
)
{"\n"}
{" "}if
response.stop_reason !=
"tool_use"
:
{"\n"}
{" "}break
{"\n"}
{" "}for
tool_call
in
response.content
:
{"\n"}
{" "}result =
execute_tool
(
tool_call.name
,
tool_call.input
)
{"\n"}
{" "}messages.
append
(
result
)
{t("message_flow_desc")}
{t("learning_path_desc")}
{meta.keyInsight}
{t("layers_desc")}
{CLASS_DESCRIPTIONS[cls.name] || ""}
{description}
{decision.alternatives && ({decision.alternatives}
{lines.map((line, i) => (
{i + 1}
{highlightLine(line)}
))}
| {row.oldNum ?? ""} | {row.newNum ?? ""} | {row.type === "add" && +} {row.type === "remove" && -} | {row.text} |
| {row.left.num ?? ""} | {row.left.text} | {row.right.num ?? ""} | {row.right.text} |
+{diff.locDelta} lines
/g,
''
);
// Wrap plain pre>code (ASCII art / diagrams) in diagram container
html = html.replace(
/'
);
// Mark the first blockquote as hero callout
html = html.replace(
//,
''
);
// Remove the h1 (it's redundant with the page header)
html = html.replace(/.*?<\/h1>\n?/, "");
// Fix ordered list counter for interrupted lists (ol start="N")
html = html.replace(
//g,
(_, start) => ``
);
return html;
}
export function DocRenderer({ version }: DocRendererProps) {
const locale = useLocale();
const doc = useMemo(() => {
const match = docsData.find(
(d: { version: string; locale: string }) =>
d.version === version && d.locale === locale
);
if (match) return match;
return docsData.find(
(d: { version: string; locale: string }) =>
d.version === version && d.locale === "en"
);
}, [version, locale]);
if (!doc) return null;
const html = useMemo(() => {
const raw = renderMarkdown(doc.content);
return postProcessHtml(raw);
}, [doc.content]);
return (
);
}
================================================
FILE: web/src/components/layout/header.tsx
================================================
"use client";
import Link from "next/link";
import { usePathname } from "next/navigation";
import { useTranslations, useLocale } from "@/lib/i18n";
import { Github, Menu, X, Sun, Moon } from "lucide-react";
import { useState } from "react";
import { cn } from "@/lib/utils";
const NAV_ITEMS = [
{ key: "timeline", href: "/timeline" },
{ key: "compare", href: "/compare" },
{ key: "layers", href: "/layers" },
] as const;
const LOCALES = [
{ code: "en", label: "EN" },
{ code: "zh", label: "中文" },
{ code: "ja", label: "日本語" },
];
export function Header() {
const t = useTranslations("nav");
const pathname = usePathname();
const locale = useLocale();
const [mobileOpen, setMobileOpen] = useState(false);
const [dark, setDark] = useState(() => {
if (typeof window !== "undefined") {
const stored = localStorage.getItem("theme");
if (stored) return stored === "dark";
return window.matchMedia("(prefers-color-scheme: dark)").matches;
}
return false;
});
function toggleDark() {
const next = !dark;
setDark(next);
document.documentElement.classList.toggle("dark", next);
localStorage.setItem("theme", next ? "dark" : "light");
}
function switchLocale(newLocale: string) {
const newPath = pathname.replace(`/${locale}`, `/${newLocale}`);
window.location.href = newPath;
}
return (
Learn Claude Code
{/* Desktop nav */}
{/* Mobile hamburger */}
{/* Mobile menu */}
{mobileOpen && (
)}
);
}
================================================
FILE: web/src/components/layout/sidebar.tsx
================================================
"use client";
import Link from "next/link";
import { usePathname } from "next/navigation";
import { LAYERS, VERSION_META } from "@/lib/constants";
import { useTranslations } from "@/lib/i18n";
import { cn } from "@/lib/utils";
const LAYER_DOT_BG: Record = {
tools: "bg-blue-500",
planning: "bg-emerald-500",
memory: "bg-purple-500",
concurrency: "bg-amber-500",
collaboration: "bg-red-500",
};
export function Sidebar() {
const pathname = usePathname();
const locale = pathname.split("/")[1] || "en";
const t = useTranslations("sessions");
const tLayer = useTranslations("layer_labels");
return (
);
}
================================================
FILE: web/src/components/simulator/agent-loop-simulator.tsx
================================================
"use client";
import { useRef, useEffect, useState } from "react";
import { AnimatePresence } from "framer-motion";
import { useTranslations } from "@/lib/i18n";
import { useSimulator } from "@/hooks/useSimulator";
import { SimulatorControls } from "./simulator-controls";
import { SimulatorMessage } from "./simulator-message";
import type { Scenario } from "@/types/agent-data";
const scenarioModules: Record Promise<{ default: Scenario }>> = {
s01: () => import("@/data/scenarios/s01.json") as Promise<{ default: Scenario }>,
s02: () => import("@/data/scenarios/s02.json") as Promise<{ default: Scenario }>,
s03: () => import("@/data/scenarios/s03.json") as Promise<{ default: Scenario }>,
s04: () => import("@/data/scenarios/s04.json") as Promise<{ default: Scenario }>,
s05: () => import("@/data/scenarios/s05.json") as Promise<{ default: Scenario }>,
s06: () => import("@/data/scenarios/s06.json") as Promise<{ default: Scenario }>,
s07: () => import("@/data/scenarios/s07.json") as Promise<{ default: Scenario }>,
s08: () => import("@/data/scenarios/s08.json") as Promise<{ default: Scenario }>,
s09: () => import("@/data/scenarios/s09.json") as Promise<{ default: Scenario }>,
s10: () => import("@/data/scenarios/s10.json") as Promise<{ default: Scenario }>,
s11: () => import("@/data/scenarios/s11.json") as Promise<{ default: Scenario }>,
s12: () => import("@/data/scenarios/s12.json") as Promise<{ default: Scenario }>,
};
interface AgentLoopSimulatorProps {
version: string;
}
export function AgentLoopSimulator({ version }: AgentLoopSimulatorProps) {
const t = useTranslations("version");
const [scenario, setScenario] = useState(null);
const scrollRef = useRef(null);
useEffect(() => {
const loader = scenarioModules[version];
if (loader) {
loader().then((mod) => setScenario(mod.default));
}
}, [version]);
const sim = useSimulator(scenario?.steps ?? []);
useEffect(() => {
if (scrollRef.current) {
scrollRef.current.scrollTo({
top: scrollRef.current.scrollHeight,
behavior: "smooth",
});
}
}, [sim.visibleSteps.length]);
if (!scenario) return null;
return (
{t("simulator")}
{scenario.description}
{sim.visibleSteps.length === 0 && (
Press Play or Step to begin
)}
{sim.visibleSteps.map((step, i) => (
))}
);
}
================================================
FILE: web/src/components/simulator/simulator-controls.tsx
================================================
"use client";
import { useTranslations } from "@/lib/i18n";
import { Play, Pause, SkipForward, RotateCcw } from "lucide-react";
import { cn } from "@/lib/utils";
interface SimulatorControlsProps {
isPlaying: boolean;
isComplete: boolean;
currentIndex: number;
totalSteps: number;
speed: number;
onPlay: () => void;
onPause: () => void;
onStep: () => void;
onReset: () => void;
onSpeedChange: (speed: number) => void;
}
const SPEEDS = [0.5, 1, 2, 4];
export function SimulatorControls({
isPlaying,
isComplete,
currentIndex,
totalSteps,
speed,
onPlay,
onPause,
onStep,
onReset,
onSpeedChange,
}: SimulatorControlsProps) {
const t = useTranslations("sim");
return (
{isPlaying ? (
) : (
)}
{t("speed")}:
{SPEEDS.map((s) => (
))}
{Math.max(0, currentIndex + 1)} {t("step_of")} {totalSteps}
);
}
================================================
FILE: web/src/components/simulator/simulator-message.tsx
================================================
"use client";
import { motion } from "framer-motion";
import { cn } from "@/lib/utils";
import type { SimStep } from "@/types/agent-data";
import { User, Bot, Terminal, ArrowRight, AlertCircle } from "lucide-react";
interface SimulatorMessageProps {
step: SimStep;
index: number;
}
const TYPE_CONFIG: Record<
string,
{ icon: typeof User; label: string; bgClass: string; borderClass: string }
> = {
user_message: {
icon: User,
label: "User",
bgClass: "bg-blue-50 dark:bg-blue-950/30",
borderClass: "border-blue-200 dark:border-blue-800",
},
assistant_text: {
icon: Bot,
label: "Assistant",
bgClass: "bg-zinc-50 dark:bg-zinc-900",
borderClass: "border-zinc-200 dark:border-zinc-700",
},
tool_call: {
icon: Terminal,
label: "Tool Call",
bgClass: "bg-amber-50 dark:bg-amber-950/30",
borderClass: "border-amber-200 dark:border-amber-800",
},
tool_result: {
icon: ArrowRight,
label: "Tool Result",
bgClass: "bg-emerald-50 dark:bg-emerald-950/30",
borderClass: "border-emerald-200 dark:border-emerald-800",
},
system_event: {
icon: AlertCircle,
label: "System",
bgClass: "bg-purple-50 dark:bg-purple-950/30",
borderClass: "border-purple-200 dark:border-purple-800",
},
};
export function SimulatorMessage({ step, index }: SimulatorMessageProps) {
const config = TYPE_CONFIG[step.type] || TYPE_CONFIG.assistant_text;
const Icon = config.icon;
return (
{config.label}
{step.toolName && (
{step.toolName}
)}
{step.type === "tool_call" || step.type === "tool_result" ? (
{step.content || "(empty)"}
) : step.type === "system_event" ? (
{step.content}
) : (
{step.content}
)}
{step.annotation}
);
}
================================================
FILE: web/src/components/timeline/timeline.tsx
================================================
"use client";
import Link from "next/link";
import { motion } from "framer-motion";
import { useTranslations, useLocale } from "@/lib/i18n";
import { LEARNING_PATH, VERSION_META, LAYERS } from "@/lib/constants";
import { LayerBadge } from "@/components/ui/badge";
import { cn } from "@/lib/utils";
import versionsData from "@/data/generated/versions.json";
const LAYER_DOT_BG: Record = {
tools: "bg-blue-500",
planning: "bg-emerald-500",
memory: "bg-purple-500",
concurrency: "bg-amber-500",
collaboration: "bg-red-500",
};
const LAYER_LINE_BG: Record = {
tools: "bg-blue-500/30",
planning: "bg-emerald-500/30",
memory: "bg-purple-500/30",
concurrency: "bg-amber-500/30",
collaboration: "bg-red-500/30",
};
const LAYER_BAR_BG: Record = {
tools: "bg-blue-500",
planning: "bg-emerald-500",
memory: "bg-purple-500",
concurrency: "bg-amber-500",
collaboration: "bg-red-500",
};
function getVersionData(id: string) {
return versionsData.versions.find((v) => v.id === id);
}
const MAX_LOC = Math.max(
...versionsData.versions
.filter((v) => LEARNING_PATH.includes(v.id as (typeof LEARNING_PATH)[number]))
.map((v) => v.loc)
);
export function Timeline() {
const t = useTranslations("timeline");
const tv = useTranslations("version");
const locale = useLocale();
return (
{/* Layer Legend */}
{t("layer_legend")}
{LAYERS.map((layer) => (
{layer.label}
))}
{/* Vertical Timeline */}
{LEARNING_PATH.map((versionId, index) => {
const meta = VERSION_META[versionId];
const data = getVersionData(versionId);
if (!meta || !data) return null;
const isLast = index === LEARNING_PATH.length - 1;
const locPercent = Math.round((data.loc / MAX_LOC) * 100);
return (
{/* Timeline line + dot */}
{versionId.replace("s", "").replace("_mini", "m")}
{!isLast && (
)}
{/* Content card */}
{versionId}
{meta.coreAddition}
{meta.title}
{meta.subtitle}
{/* Stats row */}
{data.loc} {tv("loc")}
{data.tools.length} {tv("tools")}
{/* LOC bar */}
{/* Key insight */}
{meta.keyInsight && (
“{meta.keyInsight}”
)}
{/* Link */}
{t("learn_more")}
);
})}
{/* LOC Growth Chart */}
{t("loc_growth")}
{LEARNING_PATH.map((versionId) => {
const meta = VERSION_META[versionId];
const data = getVersionData(versionId);
if (!meta || !data) return null;
const widthPercent = Math.max(
2,
Math.round((data.loc / MAX_LOC) * 100)
);
return (
{versionId}
{data.loc}
);
})}
);
}
================================================
FILE: web/src/components/ui/badge.tsx
================================================
import { cn } from "@/lib/utils";
const LAYER_COLORS = {
tools:
"bg-blue-100 text-blue-800 dark:bg-blue-900/30 dark:text-blue-300",
planning:
"bg-emerald-100 text-emerald-800 dark:bg-emerald-900/30 dark:text-emerald-300",
memory:
"bg-purple-100 text-purple-800 dark:bg-purple-900/30 dark:text-purple-300",
concurrency:
"bg-amber-100 text-amber-800 dark:bg-amber-900/30 dark:text-amber-300",
collaboration:
"bg-red-100 text-red-800 dark:bg-red-900/30 dark:text-red-300",
} as const;
interface BadgeProps {
layer: keyof typeof LAYER_COLORS;
children: React.ReactNode;
className?: string;
}
export function LayerBadge({ layer, children, className }: BadgeProps) {
return (
{children}
);
}
================================================
FILE: web/src/components/ui/card.tsx
================================================
import { cn } from "@/lib/utils";
interface CardProps extends React.HTMLAttributes {
children: React.ReactNode;
}
export function Card({ className, children, ...props }: CardProps) {
return (
{children}
);
}
export function CardHeader({ className, children, ...props }: CardProps) {
return (
{children}
);
}
export function CardTitle({
className,
children,
...props
}: React.HTMLAttributes) {
return (
{children}
);
}
================================================
FILE: web/src/components/ui/tabs.tsx
================================================
"use client";
import { useState } from "react";
import { cn } from "@/lib/utils";
interface TabsProps {
tabs: { id: string; label: string }[];
defaultTab?: string;
children: (activeTab: string) => React.ReactNode;
className?: string;
}
export function Tabs({ tabs, defaultTab, children, className }: TabsProps) {
const [active, setActive] = useState(defaultTab || tabs[0]?.id || "");
return (
{tabs.map((tab) => (
))}
{children(active)}
);
}
================================================
FILE: web/src/components/visualizations/index.tsx
================================================
"use client";
import { lazy, Suspense } from "react";
import { useTranslations } from "@/lib/i18n";
const visualizations: Record<
string,
React.LazyExoticComponent>
> = {
s01: lazy(() => import("./s01-agent-loop")),
s02: lazy(() => import("./s02-tool-dispatch")),
s03: lazy(() => import("./s03-todo-write")),
s04: lazy(() => import("./s04-subagent")),
s05: lazy(() => import("./s05-skill-loading")),
s06: lazy(() => import("./s06-context-compact")),
s07: lazy(() => import("./s07-task-system")),
s08: lazy(() => import("./s08-background-tasks")),
s09: lazy(() => import("./s09-agent-teams")),
s10: lazy(() => import("./s10-team-protocols")),
s11: lazy(() => import("./s11-autonomous-agents")),
s12: lazy(() => import("./s12-worktree-task-isolation")),
};
export function SessionVisualization({ version }: { version: string }) {
const t = useTranslations("viz");
const Component = visualizations[version];
if (!Component) return null;
return (
}
>
);
}
================================================
FILE: web/src/components/visualizations/s01-agent-loop.tsx
================================================
"use client";
import { motion, AnimatePresence } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
import { useSvgPalette } from "@/hooks/useDarkMode";
// -- Flowchart node definitions --
interface FlowNode {
id: string;
label: string;
x: number;
y: number;
w: number;
h: number;
type: "rect" | "diamond";
}
const NODES: FlowNode[] = [
{ id: "start", label: "Start", x: 160, y: 30, w: 120, h: 40, type: "rect" },
{ id: "api_call", label: "API Call", x: 160, y: 110, w: 120, h: 40, type: "rect" },
{ id: "check", label: "stop_reason?", x: 160, y: 200, w: 140, h: 50, type: "diamond" },
{ id: "execute", label: "Execute Tool", x: 160, y: 300, w: 120, h: 40, type: "rect" },
{ id: "append", label: "Append Result", x: 160, y: 380, w: 120, h: 40, type: "rect" },
{ id: "end", label: "Break / Done", x: 380, y: 200, w: 120, h: 40, type: "rect" },
];
// Edges between nodes (SVG path data computed inline)
interface FlowEdge {
from: string;
to: string;
label?: string;
}
const EDGES: FlowEdge[] = [
{ from: "start", to: "api_call" },
{ from: "api_call", to: "check" },
{ from: "check", to: "execute", label: "tool_use" },
{ from: "execute", to: "append" },
{ from: "append", to: "api_call" },
{ from: "check", to: "end", label: "end_turn" },
];
// Which nodes light up at each step
const ACTIVE_NODES_PER_STEP: string[][] = [
[],
["start"],
["api_call"],
["check", "execute"],
["execute", "append"],
["api_call", "check", "execute", "append"],
["check", "end"],
];
// Which edges highlight at each step
const ACTIVE_EDGES_PER_STEP: string[][] = [
[],
[],
["start->api_call"],
["api_call->check", "check->execute"],
["execute->append"],
["append->api_call", "api_call->check", "check->execute", "execute->append"],
["api_call->check", "check->end"],
];
// -- Message blocks --
interface MessageBlock {
role: string;
detail: string;
colorClass: string;
}
const MESSAGES_PER_STEP: (MessageBlock | null)[][] = [
[],
[{ role: "user", detail: "Fix the login bug", colorClass: "bg-blue-500 dark:bg-blue-600" }],
[],
[{ role: "assistant", detail: "tool_use: read_file", colorClass: "bg-zinc-600 dark:bg-zinc-500" }],
[{ role: "tool_result", detail: "auth.ts contents...", colorClass: "bg-emerald-500 dark:bg-emerald-600" }],
[
{ role: "assistant", detail: "tool_use: edit_file", colorClass: "bg-zinc-600 dark:bg-zinc-500" },
{ role: "tool_result", detail: "file updated", colorClass: "bg-emerald-500 dark:bg-emerald-600" },
],
[{ role: "assistant", detail: "end_turn: Done!", colorClass: "bg-purple-500 dark:bg-purple-600" }],
];
// -- Step annotations --
const STEP_INFO = [
{ title: "The While Loop", desc: "Every agent is a while loop that keeps calling the model until it says 'stop'." },
{ title: "User Input", desc: "The loop starts when the user sends a message." },
{ title: "Call the Model", desc: "Send all messages to the LLM. It sees everything and decides what to do." },
{ title: "stop_reason: tool_use", desc: "The model wants to use a tool. The loop continues." },
{ title: "Execute & Append", desc: "Run the tool, append the result to messages[]. Feed it back." },
{ title: "Loop Again", desc: "Same code path, second iteration. The model decides to edit a file." },
{ title: "stop_reason: end_turn", desc: "The model is done. Loop exits. That's the entire agent." },
];
// -- Helpers --
function getNode(id: string): FlowNode {
return NODES.find((n) => n.id === id)!;
}
function edgePath(fromId: string, toId: string): string {
const from = getNode(fromId);
const to = getNode(toId);
// Loop-back: append -> api_call (goes to the left side and back up)
if (fromId === "append" && toId === "api_call") {
const startX = from.x - from.w / 2;
const startY = from.y;
const endX = to.x - to.w / 2;
const endY = to.y;
return `M ${startX} ${startY} L ${startX - 50} ${startY} L ${endX - 50} ${endY} L ${endX} ${endY}`;
}
// Horizontal: check -> end
if (fromId === "check" && toId === "end") {
const startX = from.x + from.w / 2;
const startY = from.y;
const endX = to.x - to.w / 2;
const endY = to.y;
return `M ${startX} ${startY} L ${endX} ${endY}`;
}
// Vertical (default)
const startX = from.x;
const startY = from.y + from.h / 2;
const endX = to.x;
const endY = to.y - to.h / 2;
return `M ${startX} ${startY} L ${endX} ${endY}`;
}
// -- Component --
export default function AgentLoop({ title }: { title?: string }) {
const {
currentStep,
totalSteps,
next,
prev,
reset,
isPlaying,
toggleAutoPlay,
} = useSteppedVisualization({ totalSteps: 7, autoPlayInterval: 2500 });
const palette = useSvgPalette();
const activeNodes = ACTIVE_NODES_PER_STEP[currentStep];
const activeEdges = ACTIVE_EDGES_PER_STEP[currentStep];
// Build accumulated messages up to the current step
const visibleMessages: MessageBlock[] = [];
for (let s = 0; s <= currentStep; s++) {
for (const msg of MESSAGES_PER_STEP[s]) {
if (msg) visibleMessages.push(msg);
}
}
const stepInfo = STEP_INFO[currentStep];
return (
{title || "The Agent While-Loop"}
{/* Left panel: SVG Flowchart (60%) */}
while (stop_reason === "tool_use")
{/* Right panel: messages[] array (40%) */}
messages[]
{visibleMessages.length === 0 && (
[ empty ]
)}
{visibleMessages.map((msg, i) => (
{msg.role}
{msg.detail}
))}
{/* Array index markers */}
{visibleMessages.length > 0 && (
length: {visibleMessages.length}
)}
);
}
================================================
FILE: web/src/components/visualizations/s02-tool-dispatch.tsx
================================================
"use client";
import { motion, AnimatePresence } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
import { useSvgPalette } from "@/hooks/useDarkMode";
// -- Tool definitions --
interface ToolDef {
name: string;
desc: string;
color: string;
activeColor: string;
darkColor: string;
darkActiveColor: string;
}
const TOOLS: ToolDef[] = [
{
name: "bash",
desc: "Execute shell commands",
color: "border-orange-300 bg-orange-50",
activeColor: "border-orange-500 bg-orange-100 ring-2 ring-orange-400",
darkColor: "dark:border-zinc-700 dark:bg-zinc-800/50",
darkActiveColor: "dark:border-orange-500 dark:bg-orange-950/40 dark:ring-orange-500",
},
{
name: "read_file",
desc: "Read file contents",
color: "border-sky-300 bg-sky-50",
activeColor: "border-sky-500 bg-sky-100 ring-2 ring-sky-400",
darkColor: "dark:border-zinc-700 dark:bg-zinc-800/50",
darkActiveColor: "dark:border-sky-500 dark:bg-sky-950/40 dark:ring-sky-500",
},
{
name: "write_file",
desc: "Create or overwrite a file",
color: "border-emerald-300 bg-emerald-50",
activeColor: "border-emerald-500 bg-emerald-100 ring-2 ring-emerald-400",
darkColor: "dark:border-zinc-700 dark:bg-zinc-800/50",
darkActiveColor: "dark:border-emerald-500 dark:bg-emerald-950/40 dark:ring-emerald-500",
},
{
name: "edit_file",
desc: "Apply targeted edits",
color: "border-violet-300 bg-violet-50",
activeColor: "border-violet-500 bg-violet-100 ring-2 ring-violet-400",
darkColor: "dark:border-zinc-700 dark:bg-zinc-800/50",
darkActiveColor: "dark:border-violet-500 dark:bg-violet-950/40 dark:ring-violet-500",
},
];
// Per-step: which tool index is active (-1 = none, 4 = all)
const ACTIVE_TOOL_PER_STEP: number[] = [-1, 0, 1, 2, 3, 4];
// Incoming request JSON per step
const REQUEST_PER_STEP: (string | null)[] = [
null,
'{ name: "bash", input: { cmd: "ls -la" } }',
'{ name: "read_file", input: { path: "src/auth.ts" } }',
'{ name: "write_file", input: { path: "config.json" } }',
'{ name: "edit_file", input: { path: "index.ts" } }',
null,
];
// Step annotations
const STEP_INFO = [
{ title: "The Dispatch Map", desc: "A dictionary maps tool names to handler functions. The loop code never changes." },
{ title: "Route: bash", desc: "tool_call.name -> handlers['bash'](input). Name-based routing." },
{ title: "Route: read_file", desc: "Same pattern, different handler. Validate input, execute, return result." },
{ title: "Route: write_file", desc: "Every tool returns a tool_result that goes back into messages[]." },
{ title: "Route: edit_file", desc: "Adding a new tool = adding one entry to the dispatch map." },
{ title: "The Key Insight", desc: "The while loop stays the same. You only grow the dispatch map. That's it." },
];
// SVG layout constants
const SVG_WIDTH = 600;
const SVG_HEIGHT = 320;
const DISPATCHER_X = SVG_WIDTH / 2;
const DISPATCHER_Y = 60;
const DISPATCHER_W = 160;
const DISPATCHER_H = 50;
const CARD_Y = 230;
const CARD_W = 110;
const CARD_H = 65;
const CARD_GAP = 20;
function getCardX(index: number): number {
const totalWidth = TOOLS.length * CARD_W + (TOOLS.length - 1) * CARD_GAP;
const startX = (SVG_WIDTH - totalWidth) / 2;
return startX + index * (CARD_W + CARD_GAP) + CARD_W / 2;
}
export default function ToolDispatch({ title }: { title?: string }) {
const {
currentStep,
totalSteps,
next,
prev,
reset,
isPlaying,
toggleAutoPlay,
} = useSteppedVisualization({ totalSteps: 6, autoPlayInterval: 2500 });
const palette = useSvgPalette();
const activeToolIdx = ACTIVE_TOOL_PER_STEP[currentStep];
const request = REQUEST_PER_STEP[currentStep];
const stepInfo = STEP_INFO[currentStep];
const isAllActive = activeToolIdx === 4;
return (
{title || "Tool Dispatch Map"}
{/* Incoming request display */}
Incoming:
{request && (
{request}
)}
{!request && currentStep === 0 && (
waiting for tool_call...
)}
{isAllActive && (
All routes active
)}
{/* SVG dispatch diagram */}
{/* Code snippet below the diagram */}
const handlers = {"{"}
{TOOLS.map((tool, i) => {
const isActive = isAllActive || i === activeToolIdx;
return (
{" "}{tool.name},
);
})}
{" }{"}{"}"};
);
}
================================================
FILE: web/src/components/visualizations/s03-todo-write.tsx
================================================
"use client";
import { motion, AnimatePresence } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
// -- Task definitions --
type TaskStatus = "pending" | "in_progress" | "done";
interface Task {
id: number;
label: string;
status: TaskStatus;
}
// Snapshot of all 4 tasks at each step
const TASK_STATES: Task[][] = [
// Step 0: all pending
[
{ id: 1, label: "Write auth tests", status: "pending" },
{ id: 2, label: "Fix mobile layout", status: "pending" },
{ id: 3, label: "Add error handling", status: "pending" },
{ id: 4, label: "Update config loader", status: "pending" },
],
// Step 1: still all pending (idle round 1)
[
{ id: 1, label: "Write auth tests", status: "pending" },
{ id: 2, label: "Fix mobile layout", status: "pending" },
{ id: 3, label: "Add error handling", status: "pending" },
{ id: 4, label: "Update config loader", status: "pending" },
],
// Step 2: still all pending (idle round 2)
[
{ id: 1, label: "Write auth tests", status: "pending" },
{ id: 2, label: "Fix mobile layout", status: "pending" },
{ id: 3, label: "Add error handling", status: "pending" },
{ id: 4, label: "Update config loader", status: "pending" },
],
// Step 3: NAG fires, task 1 moves to in_progress
[
{ id: 1, label: "Write auth tests", status: "in_progress" },
{ id: 2, label: "Fix mobile layout", status: "pending" },
{ id: 3, label: "Add error handling", status: "pending" },
{ id: 4, label: "Update config loader", status: "pending" },
],
// Step 4: task 1 done
[
{ id: 1, label: "Write auth tests", status: "done" },
{ id: 2, label: "Fix mobile layout", status: "pending" },
{ id: 3, label: "Add error handling", status: "pending" },
{ id: 4, label: "Update config loader", status: "pending" },
],
// Step 5: task 2 self-directed to in_progress
[
{ id: 1, label: "Write auth tests", status: "done" },
{ id: 2, label: "Fix mobile layout", status: "in_progress" },
{ id: 3, label: "Add error handling", status: "pending" },
{ id: 4, label: "Update config loader", status: "pending" },
],
// Step 6: tasks 2,3 done, task 4 in_progress
[
{ id: 1, label: "Write auth tests", status: "done" },
{ id: 2, label: "Fix mobile layout", status: "done" },
{ id: 3, label: "Add error handling", status: "done" },
{ id: 4, label: "Update config loader", status: "in_progress" },
],
];
// Nag timer value at each step (out of 3)
const NAG_TIMER_PER_STEP = [0, 1, 2, 3, 0, 0, 0];
const NAG_THRESHOLD = 3;
// Whether the nag fires at this step
const NAG_FIRES_PER_STEP = [false, false, false, true, false, false, false];
// Step annotations
const STEP_INFO = [
{ title: "The Plan", desc: "TodoWrite gives the model a visible plan. All tasks start as pending." },
{ title: "Round 1 -- Idle", desc: "The model does work but doesn't touch its todos. The nag counter increments." },
{ title: "Round 2 -- Still Idle", desc: "Two rounds without progress. Pressure builds." },
{ title: "NAG!", desc: "Threshold reached! System message injected: 'You have pending tasks. Pick one up now!'" },
{ title: "Task Complete", desc: "The model completes the task. Timer stays at 0 -- working on todos resets the counter." },
{ title: "Self-Directed", desc: "Once the model learns the pattern, it picks up tasks voluntarily." },
{ title: "Mission Accomplished", desc: "Visible plan + nag pressure = reliable task completion." },
];
// -- Column component --
function KanbanColumn({
title,
tasks,
accentClass,
headerBg,
}: {
title: string;
tasks: Task[];
accentClass: string;
headerBg: string;
}) {
return (
{title}
{tasks.length}
{tasks.map((task) => (
))}
{tasks.length === 0 && (
--
)}
);
}
// -- Task card --
function TaskCard({ task }: { task: Task }) {
const statusStyles: Record = {
pending: "bg-zinc-100 text-zinc-600 dark:bg-zinc-800 dark:text-zinc-400",
in_progress: "bg-amber-100 text-amber-700 dark:bg-amber-900/40 dark:text-amber-300",
done: "bg-emerald-100 text-emerald-700 dark:bg-emerald-900/40 dark:text-emerald-300",
};
const borderStyles: Record = {
pending: "border-zinc-200 bg-white dark:border-zinc-700 dark:bg-zinc-800",
in_progress: "border-amber-300 bg-amber-50 dark:border-amber-700 dark:bg-amber-950/30",
done: "border-emerald-300 bg-emerald-50 dark:border-emerald-700 dark:bg-emerald-950/30",
};
return (
#{task.id}
{task.status.replace("_", " ")}
{task.label}
);
}
// -- Nag gauge --
function NagGauge({ value, max, firing }: { value: number; max: number; firing: boolean }) {
const pct = Math.min((value / max) * 100, 100);
const barColor =
value === 0
? "bg-zinc-300 dark:bg-zinc-600"
: value === 1
? "bg-green-400 dark:bg-green-500"
: value === 2
? "bg-yellow-400 dark:bg-yellow-500"
: "bg-red-500 dark:bg-red-500";
return (
Nag Timer
{value}/{max}
{firing && (
)}
);
}
// -- Main component --
export default function TodoWrite({ title }: { title?: string }) {
const {
currentStep,
totalSteps,
next,
prev,
reset,
isPlaying,
toggleAutoPlay,
} = useSteppedVisualization({ totalSteps: 7, autoPlayInterval: 2500 });
const tasks = TASK_STATES[currentStep];
const nagValue = NAG_TIMER_PER_STEP[currentStep];
const nagFires = NAG_FIRES_PER_STEP[currentStep];
const stepInfo = STEP_INFO[currentStep];
const pendingTasks = tasks.filter((t) => t.status === "pending");
const inProgressTasks = tasks.filter((t) => t.status === "in_progress");
const doneTasks = tasks.filter((t) => t.status === "done");
return (
{title || "TodoWrite Nag System"}
{/* Nag gauge + nag message */}
{nagFires && (
SYSTEM: "You have pending tasks. Pick one up now!"
)}
{/* Kanban board */}
{/* Progress summary */}
Progress: {doneTasks.length}/{tasks.length} complete
{tasks.map((t) => (
))}
);
}
================================================
FILE: web/src/components/visualizations/s04-subagent.tsx
================================================
"use client";
import { motion, AnimatePresence } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
interface MessageBlock {
id: string;
label: string;
color: string;
}
const PARENT_BASE_MESSAGES: MessageBlock[] = [
{ id: "p1", label: "user: Build login + tests", color: "bg-blue-500" },
{ id: "p2", label: "assistant: Planning approach...", color: "bg-zinc-600" },
{ id: "p3", label: "tool_result: project structure", color: "bg-emerald-500" },
];
const TASK_PROMPT: MessageBlock = {
id: "task",
label: "task: Write unit tests for auth",
color: "bg-purple-500",
};
const CHILD_WORK_MESSAGES: MessageBlock[] = [
{ id: "c1", label: "tool_use: read auth.ts", color: "bg-amber-500" },
{ id: "c2", label: "tool_use: write test.ts", color: "bg-amber-500" },
];
const SUMMARY_BLOCK: MessageBlock = {
id: "summary",
label: "summary: 3 tests written, all passing",
color: "bg-teal-500",
};
const STEPS = [
{
title: "Parent Context",
description:
"The parent agent has accumulated messages from the conversation.",
},
{
title: "Spawn Subagent",
description:
"Task tool creates a child with fresh messages[]. Only the task description is passed.",
},
{
title: "Independent Work",
description:
"The child has its own context. It doesn't see the parent's history.",
},
{
title: "Compress Result",
description:
"The child's full conversation compresses into one summary.",
},
{
title: "Return Summary",
description:
"Only the summary returns. The child's full context is discarded.",
},
{
title: "Clean Context",
description:
"The parent gets a clean summary without context bloat. This is fresh-context isolation via messages[].",
},
];
export default function SubagentIsolation({ title }: { title?: string }) {
const {
currentStep,
totalSteps,
next,
prev,
reset,
isPlaying,
toggleAutoPlay,
} = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 });
// Derive what to show in each container based on step
const parentMessages: MessageBlock[] = (() => {
const base = [...PARENT_BASE_MESSAGES];
if (currentStep >= 5) {
base.push(SUMMARY_BLOCK);
}
return base;
})();
const childMessages: MessageBlock[] = (() => {
if (currentStep < 1) return [];
if (currentStep === 1) return [TASK_PROMPT];
if (currentStep === 2) return [TASK_PROMPT, ...CHILD_WORK_MESSAGES];
if (currentStep === 3) return [SUMMARY_BLOCK];
return currentStep >= 4 ? [TASK_PROMPT, ...CHILD_WORK_MESSAGES] : [];
})();
const showChildEmpty = currentStep === 0;
const showArcToChild = currentStep === 1;
const showCompression = currentStep === 3;
const showArcToParent = currentStep === 4;
const childDiscarded = currentStep >= 4;
const childFaded = currentStep >= 4;
return (
{title || "Subagent Context Isolation"}
{/* Main layout: two containers side by side */}
{/* Parent Process Container */}
Parent Process
messages[]
{parentMessages.map((msg, i) => (
{msg.label}
))}
{currentStep >= 5 && (
3 original + 1 summary = clean context
)}
{/* Isolation Wall */}
= 1 && currentStep <= 4 ? 1 : 0.4,
}}
className="rounded bg-zinc-200 px-2 py-1 text-center font-mono text-[10px] text-zinc-500 dark:bg-zinc-700 dark:text-zinc-400"
style={{ writingMode: "vertical-rl", textOrientation: "mixed" }}
>
ISOLATION
{/* Child Process Container */}
Child Process
messages[] (fresh)
{showChildEmpty && (
not yet spawned
)}
{childMessages.map((msg) => (
{msg.label}
))}
{showCompression && (
Compressing full context into summary...
)}
{childDiscarded && (
context discarded
)}
{/* Animated arcs: task prompt going from parent to child */}
{showArcToChild && (
task prompt
)}
{showArcToParent && (
summary
)}
{/* Step Controls */}
);
}
================================================
FILE: web/src/components/visualizations/s05-skill-loading.tsx
================================================
"use client";
import { motion, AnimatePresence } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
interface SkillEntry {
name: string;
summary: string;
fullTokens: number;
content: string[];
}
const SKILLS: SkillEntry[] = [
{
name: "/commit",
summary: "Create git commits following repo conventions",
fullTokens: 320,
content: [
"1. Run git status + git diff to see changes",
"2. Analyze all staged changes and draft message",
"3. Create commit with Co-Authored-By trailer",
"4. Run git status after commit to verify",
],
},
{
name: "/review-pr",
summary: "Review pull requests for bugs and style",
fullTokens: 480,
content: [
"1. Fetch PR diff via gh pr view",
"2. Analyze changes file by file for issues",
"3. Check for bugs, security, and style problems",
"4. Post review comments with gh pr review",
],
},
{
name: "/test",
summary: "Run and analyze test suites",
fullTokens: 290,
content: [
"1. Detect test framework from package.json",
"2. Run test suite and capture output",
"3. Analyze failures and suggest fixes",
"4. Re-run after applying fixes",
],
},
{
name: "/deploy",
summary: "Deploy application to target environment",
fullTokens: 350,
content: [
"1. Verify all tests pass before deploy",
"2. Build production bundle",
"3. Push to deployment target via CI",
"4. Verify health check on deployed URL",
],
},
];
const TOKEN_STATES = [120, 120, 440, 440, 780, 780];
const MAX_TOKEN_DISPLAY = 1000;
const STEPS = [
{
title: "Layer 1: Compact Summaries",
description:
"All skills are summarized in the system prompt. Compact, always present.",
},
{
title: "Skill Invocation",
description:
'The model recognizes a skill invocation and triggers the Skill tool.',
},
{
title: "Layer 2: Full Injection",
description:
"The full skill instructions are injected as a tool_result, not into the system prompt.",
},
{
title: "In Context Now",
description:
"The detailed instructions appear as if a tool returned them. The model follows them precisely.",
},
{
title: "Stack Skills",
description:
"Multiple skills can be loaded. Only summaries are permanent; full content comes and goes.",
},
{
title: "Two-Layer Architecture",
description:
"Layer 1: always present, tiny. Layer 2: loaded on demand, detailed. Elegant separation.",
},
];
export default function SkillLoading({ title }: { title?: string }) {
const {
currentStep,
totalSteps,
next,
prev,
reset,
isPlaying,
toggleAutoPlay,
} = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 });
const tokenCount = TOKEN_STATES[currentStep];
const highlightedSkill = currentStep >= 1 && currentStep <= 3 ? 0 : currentStep >= 4 ? 1 : -1;
const showFirstContent = currentStep >= 2;
const showSecondContent = currentStep >= 4;
const firstContentFaded = currentStep >= 5;
return (
{title || "On-Demand Skill Loading"}
{/* Main content area */}
{/* System Prompt Block */}
System Prompt
always present
# Available Skills
{SKILLS.map((skill, i) => {
const isHighlighted = i === highlightedSkill;
return (
{skill.name}
{" - "}
{skill.summary}
);
})}
{/* User invocation indicator */}
{currentStep === 1 && (
User types:
/commit
)}
{currentStep === 4 && (
User types:
/review-pr
)}
{/* Connecting arrow */}
{(showFirstContent || showSecondContent) && (
)}
{/* Expanded Skill Content Blocks */}
{showFirstContent && (
SKILL.md: /commit
tool_result
{SKILLS[0].content.map((line, i) => (
{line}
))}
)}
{showSecondContent && (
SKILL.md: /review-pr
tool_result
{SKILLS[1].content.map((line, i) => (
{line}
))}
)}
{/* Mechanism annotation on step 3 */}
{currentStep === 3 && (
The Skill tool returns content as a tool_result message.
The model sees it in context and follows the instructions.
No system prompt bloat.
)}
{/* Final overview label on step 5 */}
{currentStep === 5 && (
LAYER 1
Always present, ~120 tokens
LAYER 2
On demand, ~300-500 tokens each
)}
{/* Token Gauge (vertical bar on the right) */}
Tokens
600
? "bg-amber-500"
: tokenCount > 300
? "bg-blue-500"
: "bg-emerald-500"
}`}
/>
{tokenCount}
{/* Step Controls */}
);
}
================================================
FILE: web/src/components/visualizations/s06-context-compact.tsx
================================================
"use client";
import { useMemo } from "react";
import { motion, AnimatePresence } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
type BlockType = "user" | "assistant" | "tool_result";
interface ContextBlock {
id: string;
type: BlockType;
label: string;
tokens: number;
}
const BLOCK_COLORS: Record = {
user: "bg-blue-500",
assistant: "bg-zinc-500 dark:bg-zinc-600",
tool_result: "bg-emerald-500",
};
const BLOCK_LABELS: Record = {
user: "USR",
assistant: "AST",
tool_result: "TRL",
};
function generateBlocks(count: number, seed: number): ContextBlock[] {
const types: BlockType[] = ["user", "assistant", "tool_result"];
const blocks: ContextBlock[] = [];
for (let i = 0; i < count; i++) {
const typeIndex = (i + seed) % 3;
const type = types[typeIndex];
const tokens = type === "tool_result" ? 4000 + (i % 3) * 1000 : 1500 + (i % 4) * 500;
blocks.push({
id: `b-${seed}-${i}`,
type,
label: `${BLOCK_LABELS[type]} ${i + 1}`,
tokens,
});
}
return blocks;
}
const MAX_TOKENS = 100000;
const WINDOW_HEIGHT = 350;
interface StepState {
blocks: { id: string; type: BlockType; label: string; heightPx: number; compressed?: boolean }[];
tokenCount: number;
fillPercent: number;
compressionLabel: string | null;
}
function computeStepState(step: number): StepState {
switch (step) {
case 0: {
const raw = generateBlocks(8, 0);
const tokenCount = 30000;
const totalRawTokens = raw.reduce((a, b) => a + b.tokens, 0);
const blocks = raw.map((b) => ({
...b,
heightPx: Math.max(16, (b.tokens / totalRawTokens) * WINDOW_HEIGHT * 0.3),
}));
return { blocks, tokenCount, fillPercent: 30, compressionLabel: null };
}
case 1: {
const raw = generateBlocks(16, 0);
const tokenCount = 60000;
const totalRawTokens = raw.reduce((a, b) => a + b.tokens, 0);
const blocks = raw.map((b) => ({
...b,
heightPx: Math.max(12, (b.tokens / totalRawTokens) * WINDOW_HEIGHT * 0.6),
}));
return { blocks, tokenCount, fillPercent: 60, compressionLabel: null };
}
case 2: {
const raw = generateBlocks(20, 0);
const tokenCount = 80000;
const totalRawTokens = raw.reduce((a, b) => a + b.tokens, 0);
const blocks = raw.map((b) => ({
...b,
heightPx: Math.max(10, (b.tokens / totalRawTokens) * WINDOW_HEIGHT * 0.8),
}));
return { blocks, tokenCount, fillPercent: 80, compressionLabel: null };
}
case 3: {
const raw = generateBlocks(20, 0);
const tokenCount = 60000;
const totalRawTokens = raw.reduce((a, b) => a + b.tokens, 0);
const blocks = raw.map((b) => ({
...b,
heightPx:
b.type === "tool_result"
? 6
: Math.max(12, (b.tokens / totalRawTokens) * WINDOW_HEIGHT * 0.6),
compressed: b.type === "tool_result",
}));
return {
blocks,
tokenCount,
fillPercent: 60,
compressionLabel: "MICRO-COMPACT",
};
}
case 4: {
const raw = generateBlocks(24, 1);
const tokenCount = 85000;
const totalRawTokens = raw.reduce((a, b) => a + b.tokens, 0);
const blocks = raw.map((b) => ({
...b,
heightPx: Math.max(10, (b.tokens / totalRawTokens) * WINDOW_HEIGHT * 0.85),
}));
return { blocks, tokenCount, fillPercent: 85, compressionLabel: null };
}
case 5: {
const tokenCount = 25000;
const summaryBlock = {
id: "auto-summary",
type: "assistant" as BlockType,
label: "SUMMARY",
heightPx: 40,
compressed: false,
};
const recentBlocks = generateBlocks(4, 2).map((b) => ({
...b,
heightPx: 20,
}));
return {
blocks: [summaryBlock, ...recentBlocks],
tokenCount,
fillPercent: 25,
compressionLabel: "AUTO-COMPACT",
};
}
case 6: {
const tokenCount = 8000;
const compactBlock = {
id: "compact-summary",
type: "assistant" as BlockType,
label: "COMPACT SUMMARY",
heightPx: 24,
compressed: false,
};
return {
blocks: [compactBlock],
tokenCount,
fillPercent: 8,
compressionLabel: "/compact",
};
}
default:
return { blocks: [], tokenCount: 0, fillPercent: 0, compressionLabel: null };
}
}
const STEPS = [
{
title: "Growing Context",
description:
"The context window holds the conversation. Each API call adds more messages.",
},
{
title: "Context Growing",
description:
"As the agent works, messages accumulate. The context window fills up.",
},
{
title: "Approaching Limit",
description:
"Old tool_results are the biggest consumers. Micro-compact targets these first.",
},
{
title: "Stage 1: Micro-Compact",
description:
"Replace old tool_results with short summaries. Automatic, transparent to the model.",
},
{
title: "Still Growing",
description:
"Work continues. Context grows again toward the threshold...",
},
{
title: "Stage 2: Auto-Compact",
description:
"Entire conversation summarized into a compact block. Triggered at token threshold.",
},
{
title: "Stage 3: /compact",
description:
"User-triggered, most aggressive. Three layers of strategic forgetting enable infinite sessions.",
},
];
export default function ContextCompact({ title }: { title?: string }) {
const {
currentStep,
totalSteps,
next,
prev,
reset,
isPlaying,
toggleAutoPlay,
} = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 });
const state = useMemo(() => computeStepState(currentStep), [currentStep]);
const fillColor =
state.fillPercent > 75
? "bg-red-500"
: state.fillPercent > 45
? "bg-amber-500"
: "bg-emerald-500";
const tokenDisplay = `${(state.tokenCount / 1000).toFixed(0)}K`;
return (
{title || "Three-Layer Context Compression"}
{/* Token Window (tall vertical bar on the left) */}
Context Window
{/* Blocks stacked from bottom up */}
{state.blocks.map((block) => (
{block.heightPx >= 14 && (
{block.label}
)}
))}
{/* Fill level line */}
{state.fillPercent}%
{/* Token count */}
{tokenDisplay}
/ 100K
{/* Right side: state display and compression stage */}
{/* Top: horizontal token bar */}
Token usage
{state.tokenCount.toLocaleString()} / {MAX_TOKENS.toLocaleString()}
{/* Message type legend */}
user
assistant
tool_result
{/* Highlight old tool_results at step 2 */}
{currentStep === 2 && (
tool_results are the largest blocks
File contents, command outputs, search results -- each one is thousands of tokens.
)}
{/* Compression stage label */}
{state.compressionLabel && (
{state.compressionLabel}
{currentStep === 3 && "Old tool_results shrunk to tiny summaries"}
{currentStep === 5 && "Full conversation compressed to summary block"}
{currentStep === 6 && "Most aggressive compression -- near-empty context"}
)}
{/* Three stages overview on final step */}
{currentStep === 6 && (
Stage 1: Micro -- shrink old tool_results
automatic
Stage 2: Auto -- summarize entire conversation
at threshold
Stage 3: /compact -- user-triggered, deepest compression
manual
)}
{/* Step Controls */}
);
}
================================================
FILE: web/src/components/visualizations/s07-task-system.tsx
================================================
"use client";
import { useMemo } from "react";
import { motion } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
import { useDarkMode, useSvgPalette } from "@/hooks/useDarkMode";
type TaskStatus = "pending" | "in_progress" | "completed" | "blocked";
interface TaskNode {
id: string;
label: string;
x: number;
y: number;
deps: string[];
}
interface StepInfo {
title: string;
description: string;
}
const TASKS: TaskNode[] = [
{ id: "T1", label: "T1: Setup DB", x: 80, y: 160, deps: [] },
{ id: "T2", label: "T2: API routes", x: 280, y: 80, deps: ["T1"] },
{ id: "T3", label: "T3: Auth module", x: 280, y: 240, deps: ["T1"] },
{ id: "T4", label: "T4: Integration", x: 480, y: 160, deps: ["T2", "T3"] },
{ id: "T5", label: "T5: Deploy", x: 650, y: 160, deps: ["T4"] },
];
const NODE_W = 140;
const NODE_H = 50;
const STEP_INFO: StepInfo[] = [
{
title: "File-Based Tasks",
description:
"Tasks are stored in JSON files on disk. They survive context compaction -- unlike in-memory state.",
},
{
title: "Start T1",
description:
"Tasks without dependencies can start immediately. T1 has no blockers.",
},
{
title: "T1 Complete",
description: "Completing T1 unblocks its dependents: T2 and T3.",
},
{
title: "Parallel Work",
description:
"T2 and T3 have no dependency on each other. Both can run simultaneously.",
},
{
title: "Partial Unblock",
description:
"T4 depends on BOTH T2 and T3. It waits for all blockers to complete.",
},
{
title: "Fully Unblocked",
description: "All blockers resolved. T4 can now proceed.",
},
{
title: "Graph Resolved",
description:
"The entire dependency graph is resolved. File-based persistence means this works across context compressions.",
},
];
function getTaskStatus(taskId: string, step: number): TaskStatus {
const statusMap: Record = {
T1: [
"pending",
"in_progress",
"completed",
"completed",
"completed",
"completed",
"completed",
],
T2: [
"pending",
"pending",
"pending",
"in_progress",
"completed",
"completed",
"completed",
],
T3: [
"pending",
"pending",
"pending",
"in_progress",
"in_progress",
"completed",
"completed",
],
T4: [
"pending",
"pending",
"pending",
"pending",
"blocked",
"in_progress",
"completed",
],
T5: [
"pending",
"pending",
"pending",
"pending",
"pending",
"pending",
"completed",
],
};
return statusMap[taskId]?.[step] ?? "pending";
}
function isEdgeActive(fromId: string, toId: string, step: number): boolean {
const fromStatus = getTaskStatus(fromId, step);
const toStatus = getTaskStatus(toId, step);
return (
fromStatus === "completed" &&
(toStatus === "in_progress" || toStatus === "completed")
);
}
function getStatusColor(status: TaskStatus) {
switch (status) {
case "pending":
return {
fill: "#e2e8f0",
darkFill: "#27272a",
stroke: "#cbd5e1",
darkStroke: "#3f3f46",
text: "#475569",
darkText: "#d4d4d8",
};
case "in_progress":
return {
fill: "#fef3c7",
darkFill: "#451a0340",
stroke: "#f59e0b",
darkStroke: "#d97706",
text: "#b45309",
darkText: "#fbbf24",
};
case "completed":
return {
fill: "#d1fae5",
darkFill: "#06402740",
stroke: "#10b981",
darkStroke: "#059669",
text: "#047857",
darkText: "#34d399",
};
case "blocked":
return {
fill: "#fecaca",
darkFill: "#45050540",
stroke: "#ef4444",
darkStroke: "#dc2626",
text: "#dc2626",
darkText: "#f87171",
};
}
}
function getStatusLabel(status: TaskStatus): string {
switch (status) {
case "pending":
return "pending";
case "in_progress":
return "in_progress";
case "completed":
return "done";
case "blocked":
return "blocked";
}
}
function buildCurvePath(
x1: number,
y1: number,
x2: number,
y2: number
): string {
const midX = (x1 + x2) / 2;
return `M ${x1} ${y1} C ${midX} ${y1}, ${midX} ${y2}, ${x2} ${y2}`;
}
export default function TaskSystem({ title }: { title?: string }) {
const {
currentStep,
totalSteps,
next,
prev,
reset,
isPlaying,
toggleAutoPlay,
} = useSteppedVisualization({ totalSteps: 7, autoPlayInterval: 2500 });
const isDark = useDarkMode();
const palette = useSvgPalette();
const edges = useMemo(() => {
const result: {
fromId: string;
toId: string;
x1: number;
y1: number;
x2: number;
y2: number;
}[] = [];
for (const task of TASKS) {
for (const depId of task.deps) {
const dep = TASKS.find((t) => t.id === depId);
if (!dep) continue;
result.push({
fromId: dep.id,
toId: task.id,
x1: dep.x + NODE_W,
y1: dep.y + NODE_H / 2,
x2: task.x,
y2: task.y + NODE_H / 2,
});
}
}
return result;
}, []);
const stepInfo = STEP_INFO[currentStep];
return (
{title || "Task Dependency Graph"}
{/* File persistence indicator */}
.tasks/tasks.json
Persisted to disk -- survives context compaction
{/* Legend */}
pending
in_progress
completed
blocked
);
}
================================================
FILE: web/src/components/visualizations/s08-background-tasks.tsx
================================================
"use client";
import { motion, AnimatePresence } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
import { useDarkMode, useSvgPalette } from "@/hooks/useDarkMode";
interface StepInfo {
title: string;
description: string;
}
const STEP_INFO: StepInfo[] = [
{
title: "Three Lanes",
description:
"The agent has a main thread and can spawn daemon background threads for parallel work.",
},
{
title: "Main Thread Working",
description:
"The main agent loop runs as usual, processing user requests.",
},
{
title: "Spawn Background",
description:
"Background tasks run as daemon threads. The main loop doesn't wait for them.",
},
{
title: "Multiple Backgrounds",
description: "Multiple background tasks can run concurrently.",
},
{
title: "Task Completes",
description:
"Background task finishes. Its result goes to the notification queue.",
},
{
title: "Queue Fills",
description:
"Results accumulate in the queue, invisible to the model during this turn.",
},
{
title: "Drain Queue",
description:
"Just before the next LLM call, all queued notifications are injected as tool_results. Non-blocking, async.",
},
];
const LANE_Y = {
main: 60,
bg1: 140,
bg2: 220,
} as const;
const LANE_HEIGHT = 44;
const TIMELINE_LEFT = 160;
const TIMELINE_RIGHT = 720;
const TIMELINE_WIDTH = TIMELINE_RIGHT - TIMELINE_LEFT;
const QUEUE_Y = 300;
interface WorkBlock {
lane: "main" | "bg1" | "bg2";
startFraction: number;
endFraction: number;
color: string;
label?: string;
appearsAtStep: number;
completesAtStep?: number;
}
const WORK_BLOCKS: WorkBlock[] = [
{
lane: "main",
startFraction: 0,
endFraction: 1,
color: "#8b5cf6",
label: "Main agent loop",
appearsAtStep: 1,
},
{
lane: "bg1",
startFraction: 0.18,
endFraction: 0.75,
color: "#10b981",
label: "Run tests",
appearsAtStep: 2,
completesAtStep: 5,
},
{
lane: "bg2",
startFraction: 0.35,
endFraction: 0.58,
color: "#3b82f6",
label: "Lint code",
appearsAtStep: 3,
completesAtStep: 4,
},
];
interface ForkArrow {
fromFraction: number;
toLane: "bg1" | "bg2";
appearsAtStep: number;
}
const FORK_ARROWS: ForkArrow[] = [
{ fromFraction: 0.18, toLane: "bg1", appearsAtStep: 2 },
{ fromFraction: 0.35, toLane: "bg2", appearsAtStep: 3 },
];
interface QueueCard {
id: string;
label: string;
appearsAtStep: number;
drainsAtStep: number;
}
const QUEUE_CARDS: QueueCard[] = [
{
id: "lint-result",
label: "Lint: 0 errors",
appearsAtStep: 4,
drainsAtStep: 6,
},
{
id: "test-result",
label: "Tests: 42 passed",
appearsAtStep: 5,
drainsAtStep: 6,
},
];
function fractionToX(fraction: number): number {
return TIMELINE_LEFT + fraction * TIMELINE_WIDTH;
}
function getBlockEndFraction(block: WorkBlock, step: number): number {
if (step < block.appearsAtStep) return block.startFraction;
if (block.completesAtStep !== undefined && step >= block.completesAtStep) {
return block.endFraction;
}
const growthSteps = (block.completesAtStep ?? 6) - block.appearsAtStep;
const stepsElapsed = step - block.appearsAtStep;
const progress = Math.min(stepsElapsed / growthSteps, 1);
const range = block.endFraction - block.startFraction;
return block.startFraction + range * progress;
}
export default function BackgroundTasks({ title }: { title?: string }) {
const {
currentStep,
totalSteps,
next,
prev,
reset,
isPlaying,
toggleAutoPlay,
} = useSteppedVisualization({ totalSteps: 7, autoPlayInterval: 2500 });
const isDark = useDarkMode();
const palette = useSvgPalette();
const stepInfo = STEP_INFO[currentStep];
const llmCallFraction = 0.82;
const showLlmMarker = currentStep >= 5;
return (
{title || "Background Task Lanes"}
{/* Legend */}
Main thread
Background 1
Background 2
LLM boundary
);
}
================================================
FILE: web/src/components/visualizations/s09-agent-teams.tsx
================================================
"use client";
import { motion, AnimatePresence } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
import { useSvgPalette } from "@/hooks/useDarkMode";
// -- Layout constants --
const SVG_W = 560;
const SVG_H = 340;
const AGENT_R = 40;
// Agent positions: inverted triangle (Lead top-center, Coder bottom-left, Reviewer bottom-right)
const AGENTS = [
{ id: "lead", label: "Lead", cx: SVG_W / 2, cy: 70, inbox: "lead.jsonl" },
{ id: "coder", label: "Coder", cx: 140, cy: 230, inbox: "coder.jsonl" },
{ id: "reviewer", label: "Reviewer", cx: SVG_W - 140, cy: 230, inbox: "reviewer.jsonl" },
] as const;
// Inbox tray dimensions, positioned below each agent circle
const TRAY_W = 72;
const TRAY_H = 22;
const TRAY_OFFSET_Y = AGENT_R + 14;
// Message block dimensions
const MSG_W = 60;
const MSG_H = 20;
function agentById(id: string) {
return AGENTS.find((a) => a.id === id)!;
}
function trayCenter(id: string) {
const a = agentById(id);
return { x: a.cx, y: a.cy + TRAY_OFFSET_Y + TRAY_H / 2 };
}
// Step configuration
const STEPS = [
{ title: "The Team", desc: "Teams use a leader-worker pattern. Each teammate has a file-based mailbox inbox." },
{ title: "Lead Assigns Work", desc: "Communication is async: write a message to the recipient's .jsonl inbox file." },
{ title: "Read Inbox", desc: "Teammates poll their inbox before each LLM call. New messages become context." },
{ title: "Independent Work", desc: "Each teammate runs its own agent loop independently." },
{ title: "Pass Result", desc: "Results flow through the same mailbox mechanism. All communication is via files." },
{ title: "Feedback Loop", desc: "The mailbox pattern supports any communication topology: linear, broadcast, round-robin." },
{ title: "File-Based Coordination", desc: "No shared memory, no locks. All coordination through append-only files. Simple, robust, debuggable." },
];
// Helper: determine which agent glows at each step
function agentGlows(agentId: string, step: number): boolean {
if (step === 1 && agentId === "lead") return true;
if (step === 2 && agentId === "coder") return true;
if (step === 3 && agentId === "coder") return true;
if (step === 4 && agentId === "coder") return true;
if (step === 5 && agentId === "reviewer") return true;
return false;
}
// Helper: determine which inbox tray has a message sitting in it
function trayHasMessage(agentId: string, step: number): boolean {
if (step === 2 && agentId === "coder") return true;
if (step === 4 && agentId === "reviewer") return false;
if (step === 5 && agentId === "reviewer") return true;
return false;
}
// Animated message that travels from one point to another
function TravelingMessage({
fromX,
fromY,
toX,
toY,
label,
delay = 0,
}: {
fromX: number;
fromY: number;
toX: number;
toY: number;
label: string;
delay?: number;
}) {
return (
{label}
);
}
// Faded trace line between two agents
function TraceLine({ from, to, strokeColor }: { from: string; to: string; strokeColor: string }) {
const f = trayCenter(from);
const t = trayCenter(to);
return (
);
}
export default function AgentTeams({ title }: { title?: string }) {
const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 });
const step = vis.currentStep;
const palette = useSvgPalette();
return (
{title || "Agent Team Mailboxes"}
{/* SVG visualization */}
{/* Step controls */}
);
}
================================================
FILE: web/src/components/visualizations/s10-team-protocols.tsx
================================================
"use client";
import { useState } from "react";
import { motion, AnimatePresence } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
import { useSvgPalette } from "@/hooks/useDarkMode";
type Protocol = "shutdown" | "plan";
// -- Layout constants for the sequence diagram --
const SVG_W = 560;
const SVG_H = 360;
const LIFELINE_LEFT_X = 140;
const LIFELINE_RIGHT_X = 420;
const LIFELINE_TOP = 60;
const LIFELINE_BOTTOM = 330;
const ACTIVATION_W = 12;
const ARROW_Y_START = 110;
const ARROW_Y_GAP = 70;
// Request ID shown on message tags
const REQUEST_ID = "req_abc";
// -- Shutdown protocol step definitions --
const SHUTDOWN_STEPS = [
{ title: "Structured Protocols", desc: "Protocols define structured message exchanges with correlated request IDs." },
{ title: "Shutdown Request", desc: "The leader initiates shutdown. The request_id links the request to its response." },
{ title: "Teammate Decides", desc: "The teammate can accept or reject. It's not a forced kill -- it's a polite request." },
{ title: "Approved", desc: "Same request_id in the response. Teammate exits cleanly." },
];
// -- Plan approval protocol step definitions --
const PLAN_STEPS = [
{ title: "Plan Approval", desc: "Teammates in plan_mode must get approval before implementing changes." },
{ title: "Submit Plan", desc: "The teammate designs a plan and sends it to the leader for review." },
{ title: "Leader Reviews", desc: "Leader reviews and approves or rejects with feedback. Same request-response pattern." },
];
// Horizontal arrow between lifelines
function SequenceArrow({
y,
direction,
label,
tagLabel,
color,
tagBg,
tagStroke,
tagText,
}: {
y: number;
direction: "right" | "left";
label: string;
tagLabel?: string;
color: string;
tagBg?: string;
tagStroke?: string;
tagText?: string;
}) {
const fromX = direction === "right" ? LIFELINE_LEFT_X + ACTIVATION_W / 2 : LIFELINE_RIGHT_X - ACTIVATION_W / 2;
const toX = direction === "right" ? LIFELINE_RIGHT_X - ACTIVATION_W / 2 : LIFELINE_LEFT_X + ACTIVATION_W / 2;
const arrowTip = direction === "right" ? toX - 6 : toX + 6;
const labelX = (fromX + toX) / 2;
return (
{/* Arrow line */}
{/* Arrow head */}
{/* Message label */}
{label}
{/* Request ID tag */}
{tagLabel && (
{tagLabel}
)}
);
}
// Decision diamond on a lifeline
function DecisionBox({ x, y }: { x: number; y: number }) {
const size = 14;
return (
?
approve
reject
);
}
// Activation bar on a lifeline
function ActivationBar({
x,
yStart,
yEnd,
color,
}: {
x: number;
yStart: number;
yEnd: number;
color: string;
}) {
return (
);
}
export default function TeamProtocols({ title }: { title?: string }) {
const [protocol, setProtocol] = useState("shutdown");
const totalSteps = protocol === "shutdown" ? SHUTDOWN_STEPS.length : PLAN_STEPS.length;
const steps = protocol === "shutdown" ? SHUTDOWN_STEPS : PLAN_STEPS;
const vis = useSteppedVisualization({ totalSteps, autoPlayInterval: 2500 });
const step = vis.currentStep;
const palette = useSvgPalette();
const switchProtocol = (p: Protocol) => {
setProtocol(p);
vis.reset();
};
const leftLabel = protocol === "shutdown" ? "Leader" : "Leader";
const rightLabel = protocol === "shutdown" ? "Teammate" : "Teammate";
return (
{title || "FSM Team Protocols"}
{/* Protocol toggle */}
{/* Sequence diagram SVG */}
{/* Step controls */}
);
}
================================================
FILE: web/src/components/visualizations/s11-autonomous-agents.tsx
================================================
"use client";
import { motion } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
import { useSvgPalette } from "@/hooks/useDarkMode";
// -- FSM states and their layout positions (diamond: idle top, poll right, claim bottom, work left) --
type Phase = "idle" | "poll" | "claim" | "work";
const FSM_CX = 110;
const FSM_CY = 110;
const FSM_R = 65;
const FSM_STATE_R = 22;
const FSM_STATES: { id: Phase; label: string; angle: number }[] = [
{ id: "idle", label: "idle", angle: -Math.PI / 2 },
{ id: "poll", label: "poll", angle: 0 },
{ id: "claim", label: "claim", angle: Math.PI / 2 },
{ id: "work", label: "work", angle: Math.PI },
];
const FSM_TRANSITIONS: { from: Phase; to: Phase }[] = [
{ from: "idle", to: "poll" },
{ from: "poll", to: "claim" },
{ from: "claim", to: "work" },
{ from: "work", to: "idle" },
];
function fsmPos(angle: number) {
return { x: FSM_CX + FSM_R * Math.cos(angle), y: FSM_CY + FSM_R * Math.sin(angle) };
}
const PHASE_COLORS: Record = {
idle: "#a1a1aa",
poll: "#f59e0b",
claim: "#3b82f6",
work: "#10b981",
};
// -- Task board data --
interface TaskRow {
id: string;
name: string;
status: "unclaimed" | "active" | "complete";
owner: string;
}
const INITIAL_TASKS: TaskRow[] = [
{ id: "T1", name: "Fix auth bug", status: "unclaimed", owner: "-" },
{ id: "T2", name: "Add rate limiter", status: "unclaimed", owner: "-" },
{ id: "T3", name: "Write tests", status: "unclaimed", owner: "-" },
{ id: "T4", name: "Update API docs", status: "unclaimed", owner: "-" },
];
// Agent positions around the task board (left panel)
const BOARD_CX = 140;
const BOARD_CY = 90;
const AGENT_ORBIT = 85;
const AGENT_R = 20;
const AGENT_ANGLES = [-Math.PI / 2, Math.PI / 6, (5 * Math.PI) / 6];
function agentPos(index: number) {
const angle = AGENT_ANGLES[index];
return { x: BOARD_CX + AGENT_ORBIT * Math.cos(angle), y: BOARD_CY + AGENT_ORBIT * Math.sin(angle) };
}
// -- Step definitions --
const STEPS = [
{ title: "Self-Governing Agents", desc: "Autonomous agents need no coordinator. They govern themselves with an idle-poll-claim-work cycle." },
{ title: "Idle Timer", desc: "Each idle agent counts rounds. A timeout triggers self-directed task polling." },
{ title: "Poll Task Board", desc: "Timeout! The agent reads the task board looking for unclaimed work." },
{ title: "Claim Task", desc: "The agent writes its name to the task record. Atomic, no conflicts." },
{ title: "Work", desc: "The agent works on the claimed task using its own agent loop." },
{ title: "Independent Polling", desc: "Multiple agents poll and claim independently. No central coordinator needed." },
{ title: "Complete & Reset", desc: "Task done. Agent returns to idle. The cycle repeats." },
{ title: "Self-Organization", desc: "Three agents, zero coordination overhead. Polling + timeout = emergent organization." },
];
// Per-step state for each agent
interface AgentState {
phase: Phase;
timerFill: number;
color: string;
taskClaim: string | null;
}
function getAgentStates(step: number): AgentState[] {
const idle: AgentState = { phase: "idle", timerFill: 0, color: PHASE_COLORS.idle, taskClaim: null };
switch (step) {
case 0:
return [
{ ...idle },
{ ...idle },
{ ...idle },
];
case 1:
return [
{ phase: "idle", timerFill: 0.6, color: PHASE_COLORS.idle, taskClaim: null },
{ ...idle },
{ ...idle },
];
case 2:
return [
{ phase: "poll", timerFill: 1.0, color: PHASE_COLORS.poll, taskClaim: null },
{ ...idle },
{ ...idle },
];
case 3:
return [
{ phase: "claim", timerFill: 0, color: PHASE_COLORS.claim, taskClaim: "T1" },
{ ...idle },
{ ...idle },
];
case 4:
return [
{ phase: "work", timerFill: 0, color: PHASE_COLORS.work, taskClaim: "T1" },
{ ...idle },
{ ...idle },
];
case 5:
return [
{ phase: "work", timerFill: 0, color: PHASE_COLORS.work, taskClaim: "T1" },
{ phase: "claim", timerFill: 0, color: PHASE_COLORS.claim, taskClaim: "T2" },
{ ...idle },
];
case 6:
return [
{ phase: "idle", timerFill: 0, color: PHASE_COLORS.idle, taskClaim: null },
{ phase: "work", timerFill: 0, color: PHASE_COLORS.work, taskClaim: "T2" },
{ ...idle },
];
case 7:
return [
{ phase: "idle", timerFill: 0, color: PHASE_COLORS.idle, taskClaim: null },
{ phase: "work", timerFill: 0, color: PHASE_COLORS.work, taskClaim: "T2" },
{ phase: "claim", timerFill: 0, color: PHASE_COLORS.claim, taskClaim: "T3" },
];
default:
return [{ ...idle }, { ...idle }, { ...idle }];
}
}
function getTaskStates(step: number): TaskRow[] {
const tasks = INITIAL_TASKS.map((t) => ({ ...t }));
if (step >= 3) { tasks[0].status = "active"; tasks[0].owner = "A"; }
if (step >= 5) { tasks[1].status = "active"; tasks[1].owner = "B"; }
if (step >= 6) { tasks[0].status = "complete"; }
if (step >= 7) { tasks[2].status = "active"; tasks[2].owner = "C"; }
return tasks;
}
function getActivePhase(step: number): Phase {
if (step <= 1) return "idle";
if (step === 2) return "poll";
if (step === 3) return "claim";
if (step === 4 || step === 5) return "work";
if (step === 6) return "idle";
return "claim";
}
// Ring timer around an agent
function TimerRing({ cx, cy, r, fill }: { cx: number; cy: number; r: number; fill: number }) {
if (fill <= 0) return null;
const circumference = 2 * Math.PI * (r + 4);
const offset = circumference * (1 - fill);
return (
);
}
// FSM arrow between two states
function FSMArrow({ from, to, active, inactiveStroke }: { from: Phase; to: Phase; active: boolean; inactiveStroke: string }) {
const fState = FSM_STATES.find((s) => s.id === from)!;
const tState = FSM_STATES.find((s) => s.id === to)!;
const fPos = fsmPos(fState.angle);
const tPos = fsmPos(tState.angle);
const dx = tPos.x - fPos.x;
const dy = tPos.y - fPos.y;
const dist = Math.sqrt(dx * dx + dy * dy);
const ux = dx / dist;
const uy = dy / dist;
const x1 = fPos.x + ux * FSM_STATE_R;
const y1 = fPos.y + uy * FSM_STATE_R;
const x2 = tPos.x - ux * (FSM_STATE_R + 6);
const y2 = tPos.y - uy * (FSM_STATE_R + 6);
const perpX = -uy * 12;
const perpY = ux * 12;
const cx = (x1 + x2) / 2 + perpX;
const cy = (y1 + y2) / 2 + perpY;
return (
);
}
export default function AutonomousAgents({ title }: { title?: string }) {
const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2500 });
const step = vis.currentStep;
const palette = useSvgPalette();
const agentStates = getAgentStates(step);
const tasks = getTaskStates(step);
const activePhase = getActivePhase(step);
const agentNames = ["A", "B", "C"];
return (
{title || "Autonomous Agent Cycle"}
{/* Left panel: spatial view with agents and task board */}
Spatial View
{/* Task table below the spatial view */}
Task
Status
Owner
{tasks.map((task) => (
{task.name}
{task.status}
{task.owner}
))}
{/* Right panel: FSM state machine diagram */}
FSM Cycle
{/* Legend */}
{FSM_STATES.map((s) => (
{s.label}
))}
{/* Step controls */}
);
}
================================================
FILE: web/src/components/visualizations/s12-worktree-task-isolation.tsx
================================================
"use client";
import { motion } from "framer-motion";
import { useSteppedVisualization } from "@/hooks/useSteppedVisualization";
import { StepControls } from "@/components/visualizations/shared/step-controls";
type TaskStatus = "pending" | "in_progress" | "completed";
interface TaskRow {
id: number;
subject: string;
status: TaskStatus;
worktree: string;
}
interface WorktreeRow {
name: string;
branch: string;
task: string;
state: "none" | "active" | "kept" | "removed";
}
interface Lane {
name: string;
files: string[];
highlight?: boolean;
}
interface StepState {
title: string;
desc: string;
tasks: TaskRow[];
worktrees: WorktreeRow[];
lanes: Lane[];
op: string;
}
const STEPS: StepState[] = [
{
title: "Single Workspace Pain",
desc: "Two tasks are active, but both edits would hit one directory and collide.",
op: "task_create x2",
tasks: [
{ id: 1, subject: "Auth refactor", status: "in_progress", worktree: "" },
{ id: 2, subject: "UI login polish", status: "in_progress", worktree: "" },
],
worktrees: [],
lanes: [
{ name: "main", files: ["auth/service.py", "ui/Login.tsx"], highlight: true },
{ name: "wt/auth-refactor", files: [] },
{ name: "wt/ui-login", files: [] },
],
},
{
title: "Allocate Lane for Task 1",
desc: "Create a worktree lane and associate it with task 1 for clear ownership.",
op: "worktree_create(name='auth-refactor', task_id=1)",
tasks: [
{ id: 1, subject: "Auth refactor", status: "in_progress", worktree: "auth-refactor" },
{ id: 2, subject: "UI login polish", status: "in_progress", worktree: "" },
],
worktrees: [
{ name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "active" },
],
lanes: [
{ name: "main", files: ["ui/Login.tsx"] },
{ name: "wt/auth-refactor", files: ["auth/service.py"], highlight: true },
{ name: "wt/ui-login", files: [] },
],
},
{
title: "Allocate Lane for Task 2",
desc: "Lane creation and task association can be separate. Here task 2 binds after lane creation.",
op: "worktree_create(name='ui-login')\ntask_bind_worktree(task_id=2, worktree='ui-login')",
tasks: [
{ id: 1, subject: "Auth refactor", status: "in_progress", worktree: "auth-refactor" },
{ id: 2, subject: "UI login polish", status: "in_progress", worktree: "ui-login" },
],
worktrees: [
{ name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "active" },
{ name: "ui-login", branch: "wt/ui-login", task: "#2", state: "active" },
],
lanes: [
{ name: "main", files: [] },
{ name: "wt/auth-refactor", files: ["auth/service.py"] },
{ name: "wt/ui-login", files: ["ui/Login.tsx"], highlight: true },
],
},
{
title: "Run Commands in Isolated Lanes",
desc: "Each command routes by selected lane directory, not by the shared root.",
op: "worktree_run('auth-refactor', 'pytest tests/auth -q')",
tasks: [
{ id: 1, subject: "Auth refactor", status: "in_progress", worktree: "auth-refactor" },
{ id: 2, subject: "UI login polish", status: "in_progress", worktree: "ui-login" },
],
worktrees: [
{ name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "active" },
{ name: "ui-login", branch: "wt/ui-login", task: "#2", state: "active" },
],
lanes: [
{ name: "main", files: [] },
{ name: "wt/auth-refactor", files: ["auth/service.py", "tests/auth/test_login.py"], highlight: true },
{ name: "wt/ui-login", files: ["ui/Login.tsx", "ui/Login.css"] },
],
},
{
title: "Keep One Lane, Close Another",
desc: "Closeout can mix decisions: keep ui-login active for follow-up, remove auth-refactor and complete task 1.",
op: "worktree_keep('ui-login')\nworktree_remove('auth-refactor', complete_task=true)\nworktree_events(limit=10)",
tasks: [
{ id: 1, subject: "Auth refactor", status: "completed", worktree: "" },
{ id: 2, subject: "UI login polish", status: "in_progress", worktree: "ui-login" },
],
worktrees: [
{ name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "removed" },
{ name: "ui-login", branch: "wt/ui-login", task: "#2", state: "kept" },
],
lanes: [
{ name: "main", files: [] },
{ name: "wt/auth-refactor", files: [] },
{ name: "wt/ui-login", files: ["ui/Login.tsx"], highlight: true },
],
},
{
title: "Isolation + Coordination + Events",
desc: "The board tracks shared truth, worktree lanes isolate execution, and events provide auditable side-channel traces.",
op: "task_list + worktree_list + worktree_events",
tasks: [
{ id: 1, subject: "Auth refactor", status: "completed", worktree: "" },
{ id: 2, subject: "UI login polish", status: "in_progress", worktree: "ui-login" },
],
worktrees: [
{ name: "auth-refactor", branch: "wt/auth-refactor", task: "#1", state: "removed" },
{ name: "ui-login", branch: "wt/ui-login", task: "#2", state: "kept" },
],
lanes: [
{ name: "main", files: [] },
{ name: "wt/auth-refactor", files: [] },
{ name: "wt/ui-login", files: ["ui/Login.tsx"], highlight: true },
],
},
];
function statusClass(status: TaskStatus): string {
if (status === "completed") return "bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-300";
if (status === "in_progress") return "bg-amber-100 text-amber-700 dark:bg-amber-900/30 dark:text-amber-300";
return "bg-zinc-100 text-zinc-700 dark:bg-zinc-800 dark:text-zinc-300";
}
function worktreeClass(state: WorktreeRow["state"]): string {
if (state === "active") return "border-emerald-300 bg-emerald-50 dark:border-emerald-800 dark:bg-emerald-900/20";
if (state === "kept") return "border-sky-300 bg-sky-50 dark:border-sky-800 dark:bg-sky-900/20";
if (state === "removed") return "border-zinc-200 bg-zinc-100 opacity-70 dark:border-zinc-700 dark:bg-zinc-800";
return "border-zinc-200 bg-white dark:border-zinc-700 dark:bg-zinc-900";
}
export default function WorktreeTaskIsolation({ title }: { title?: string }) {
const vis = useSteppedVisualization({ totalSteps: STEPS.length, autoPlayInterval: 2600 });
const step = STEPS[vis.currentStep];
return (
{title || "Worktree Task Isolation"}
{step.op}
Task Board (.tasks)
{step.tasks.map((task) => (
#{task.id}
{task.status}
{task.subject}
worktree: {task.worktree || "-"}
))}
Worktree Index (.worktrees/index.json)
{step.worktrees.length === 0 && (
no worktrees yet
)}
{step.worktrees.map((wt) => (
{wt.name}
{wt.branch}
task: {wt.task}
))}
Execution Lanes
{step.lanes.map((lane) => (
{lane.name}
{lane.files.length === 0 ? (
(no changes)
) : (
lane.files.map((f) => {f})
)}
))}
{step.title}
{step.desc}
);
}
================================================
FILE: web/src/components/visualizations/shared/step-controls.tsx
================================================
"use client";
import { Play, Pause, SkipBack, SkipForward, RotateCcw } from "lucide-react";
import { cn } from "@/lib/utils";
interface StepControlsProps {
currentStep: number;
totalSteps: number;
onPrev: () => void;
onNext: () => void;
onReset: () => void;
isPlaying: boolean;
onToggleAutoPlay: () => void;
stepTitle: string;
stepDescription: string;
className?: string;
}
export function StepControls({
currentStep,
totalSteps,
onPrev,
onNext,
onReset,
isPlaying,
onToggleAutoPlay,
stepTitle,
stepDescription,
className,
}: StepControlsProps) {
return (
{/* Annotation */}
{stepTitle}
{stepDescription}
{/* Controls */}
{/* Step indicator */}
{Array.from({ length: totalSteps }, (_, i) => (
))}
{currentStep + 1}/{totalSteps}
);
}
================================================
FILE: web/src/data/annotations/s01.json
================================================
{
"version": "s01",
"decisions": [
{
"id": "one-tool-sufficiency",
"title": "Why Bash Alone Is Enough",
"description": "Bash can read files, write files, run arbitrary programs, pipe data between processes, and manage the filesystem. Any additional tool (read_file, write_file, etc.) would be a strict subset of what bash already provides. Adding more tools doesn't unlock new capabilities -- it just adds surface area for confusion. The model has to learn fewer tool schemas, and the implementation stays under 100 lines. This is the minimal viable agent: one tool, one loop.",
"alternatives": "We could have started with a richer toolset (file I/O, HTTP, database), but that would obscure the core insight: an LLM with a shell is already a general-purpose agent. Starting minimal also makes it obvious what each subsequent version actually adds.",
"zh": {
"title": "为什么仅靠 Bash 就够了",
"description": "Bash 能读写文件、运行任意程序、在进程间传递数据、管理文件系统。任何额外的工具(read_file、write_file 等)都只是 bash 已有能力的子集。增加工具并不会解锁新能力,只会增加模型需要理解的接口。模型只需学习一个工具的 schema,实现代码不超过 100 行。这就是最小可行 agent:一个工具,一个循环。"
},
"ja": {
"title": "Bash だけで十分な理由",
"description": "Bash はファイルの読み書き、任意のプログラムの実行、プロセス間のデータパイプ、ファイルシステムの管理が可能です。追加のツール(read_file、write_file など)は bash が既に提供している機能の部分集合に過ぎません。ツールを増やしても新しい能力は得られず、モデルが理解すべきインターフェースが増えるだけです。モデルが学習するスキーマは1つだけで、実装は100行以内に収まります。これが最小限の実用的エージェント:1つのツール、1つのループです。"
}
},
{
"id": "process-as-subagent",
"title": "Recursive Process Spawning as Subagent Mechanism",
"description": "When the agent runs `python v0.py \"subtask\"`, it spawns a completely new process with a fresh LLM context. This child process is effectively a subagent: it has its own system prompt, its own conversation history, and its own task focus. When it finishes, the parent gets the stdout result. This is subagent delegation without any framework -- just Unix process semantics. Each child process naturally isolates concerns because it literally cannot see the parent's context.",
"alternatives": "A framework-level subagent system (like v3's Task tool) gives more control over what tools the subagent can access and how results are returned. But at v0, the point is to show that process spawning is the most primitive form of agent delegation -- no shared memory, no message passing, just stdin/stdout.",
"zh": {
"title": "用递归进程创建实现子代理机制",
"description": "当 agent 执行 `python v0.py \"subtask\"` 时,它会创建一个全新的进程,拥有全新的 LLM 上下文。这个子进程实际上就是一个子代理:有自己的系统提示词、对话历史和任务焦点。子进程完成后,父进程通过 stdout 获取结果。这就是不依赖任何框架的子代理委派——纯粹的 Unix 进程语义。每个子进程天然隔离关注点,因为它根本看不到父进程的上下文。"
},
"ja": {
"title": "再帰プロセス生成によるサブエージェント機構",
"description": "エージェントが `python v0.py \"subtask\"` を実行すると、新しい LLM コンテキストを持つ完全に新しいプロセスが生成されます。この子プロセスは事実上サブエージェントです:独自のシステムプロンプト、会話履歴、タスクフォーカスを持ちます。完了すると、親プロセスは stdout で結果を受け取ります。これはフレームワークなしのサブエージェント委任です——共有メモリもメッセージパッシングもなく、stdin/stdout だけです。各子プロセスは親のコンテキストを参照できないため、関心の分離が自然に実現されます。"
}
},
{
"id": "model-drives-everything",
"title": "No Planning Framework -- The Model Decides",
"description": "There is no planner, no task queue, no state machine. The system prompt tells the model how to approach problems, and the model decides what bash command to run next based on the conversation so far. This is intentional: at this level, adding a planning layer would be premature abstraction. The model's chain-of-thought IS the plan. The agent loop just keeps asking the model what to do until it stops requesting tools.",
"alternatives": "Later versions (v2) add explicit planning via TodoWrite. But v0 proves that implicit planning through the model's reasoning is sufficient for many tasks. The planning framework only becomes necessary when you need external visibility into the agent's intentions.",
"zh": {
"title": "没有规划框架——由模型自行决策",
"description": "没有规划器,没有任务队列,没有状态机。系统提示词告诉模型如何处理问题,模型根据对话历史决定下一步执行什么 bash 命令。这是有意为之的:在这个层级,添加规划层属于过早抽象。模型的思维链本身就是计划。agent 循环只是不断询问模型下一步做什么,直到模型不再请求工具为止。"
},
"ja": {
"title": "計画フレームワークなし——モデルが全てを決定",
"description": "プランナーもタスクキューも状態マシンもありません。システムプロンプトがモデルに問題の取り組み方を伝え、モデルがこれまでの会話に基づいて次に実行する bash コマンドを決定します。これは意図的な設計です:このレベルでは計画レイヤーの追加は時期尚早な抽象化です。モデルの思考の連鎖そのものが計画です。エージェントループはモデルがツールの呼び出しを止めるまで、次の行動を問い続けるだけです。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s02.json
================================================
{
"version": "s02",
"decisions": [
{
"id": "four-tools-not-twenty",
"title": "Why Exactly Four Tools",
"description": "The four tools are bash, read_file, write_file, and edit_file. Together they cover roughly 95% of coding tasks. Bash handles execution and arbitrary commands. Read_file provides precise file reading with line numbers. Write_file creates or overwrites files. Edit_file does surgical string replacement. More tools would increase the model's cognitive load -- it has to decide which tool to use, and more options means more chances of picking the wrong one. Fewer tools also means fewer tool schemas to maintain and fewer edge cases to handle.",
"alternatives": "We could add specialized tools (list_directory, search_files, http_request), and later versions do. But at this stage, bash already covers those use cases. The split from v0's single tool to v1's four tools is specifically about giving the model structured I/O for file operations, where bash's quoting and escaping often trips up the model.",
"zh": {
"title": "为什么恰好四个工具",
"description": "四个工具分别是 bash、read_file、write_file 和 edit_file,覆盖了大约 95% 的编程任务。Bash 处理执行和任意命令;read_file 提供带行号的精确文件读取;write_file 创建或覆盖文件;edit_file 做精确的字符串替换。工具越多,模型的认知负担越重——它必须在更多选项中做选择,选错的概率也随之增加。更少的工具也意味着更少的 schema 需要维护、更少的边界情况需要处理。"
},
"ja": {
"title": "なぜ正確に4つのツールなのか",
"description": "4つのツールは bash、read_file、write_file、edit_file です。これらでコーディングタスクの約95%をカバーします。Bash は実行と任意のコマンドを処理し、read_file は行番号付きの正確なファイル読み取りを提供し、write_file はファイルの作成・上書きを行い、edit_file は外科的な文字列置換を行います。ツールが増えるとモデルの認知負荷が増大し、どのツールを使うかの判断でミスが増えます。ツールが少ないことは、メンテナンスすべきスキーマとエッジケースの削減も意味します。"
}
},
{
"id": "model-as-agent",
"title": "The Model IS the Agent",
"description": "The core agent loop is trivially simple: while True, call the LLM, if it returns tool_use blocks then execute them and feed results back, if it returns only text then stop. There is no router, no decision tree, no workflow engine. The model itself decides what to do, when to stop, and how to recover from errors. The code is just plumbing that connects the model to tools. This is a philosophical stance: agent behavior emerges from the model, not from the framework.",
"alternatives": "Many agent frameworks add elaborate orchestration layers: ReAct loops with explicit Thought/Action/Observation parsing, LangChain-style chains, AutoGPT-style goal decomposition. These frameworks assume the model needs scaffolding to behave as an agent. Our approach assumes the model already knows how to be an agent -- it just needs tools to act on the world.",
"zh": {
"title": "模型本身就是代理",
"description": "核心 agent 循环极其简单:不断调用 LLM,如果返回 tool_use 块就执行并回传结果,如果只返回文本就停止。没有路由器,没有决策树,没有工作流引擎。模型自己决定做什么、何时停止、如何从错误中恢复。代码只是连接模型和工具的管道。这是一种设计哲学:agent 行为从模型中涌现,而非由框架定义。"
},
"ja": {
"title": "モデルそのものがエージェント",
"description": "コアのエージェントループは極めてシンプルです:LLM を呼び出し続け、tool_use ブロックが返されればそれを実行して結果をフィードバックし、テキストのみが返されれば停止します。ルーターも決定木もワークフローエンジンもありません。モデル自体が何をすべきか、いつ停止するか、エラーからどう回復するかを決定します。コードはモデルとツールを接続する配管に過ぎません。これは設計思想です:エージェントの振る舞いはフレームワークではなくモデルから創発するものです。"
}
},
{
"id": "explicit-tool-schemas",
"title": "JSON Schemas for Every Tool",
"description": "Each tool defines a strict JSON schema for its input parameters. For example, edit_file requires old_string and new_string as exact strings, not regex patterns. This eliminates an entire class of bugs: the model can't pass malformed input because the API validates against the schema before execution. It also makes the model's intent unambiguous -- when it calls edit_file with specific strings, there's no parsing ambiguity about what it wants to change.",
"alternatives": "Some agent systems let the model output free-form text that gets parsed with regex or heuristics (e.g., extracting code from markdown blocks). This is fragile -- the model might format output slightly differently and break the parser. JSON schemas trade flexibility for reliability.",
"zh": {
"title": "每个工具都有 JSON Schema",
"description": "每个工具都为输入参数定义了严格的 JSON schema。例如,edit_file 要求 old_string 和 new_string 是精确的字符串,而非正则表达式。这消除了一整类错误:模型无法传递格式错误的输入,因为 API 会在执行前校验 schema。这也使模型的意图变得明确——当它用特定字符串调用 edit_file 时,不存在关于它想修改什么的解析歧义。"
},
"ja": {
"title": "全ツールに JSON Schema を定義",
"description": "各ツールは入力パラメータに対して厳密な JSON Schema を定義しています。例えば edit_file は old_string と new_string を正確な文字列として要求し、正規表現は使いません。これにより一連のバグを排除できます:API がスキーマに対して実行前にバリデーションを行うため、モデルは不正な入力を渡せません。モデルの意図も明確になります――特定の文字列で edit_file を呼び出す際、何を変更したいかについて解析の曖昧さがありません。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s03.json
================================================
{
"version": "s03",
"decisions": [
{
"id": "visible-planning",
"title": "Making Plans Visible via TodoWrite",
"description": "Instead of letting the model plan silently in its chain-of-thought, we force plans to be externalized through the TodoWrite tool. Each plan item has a status (pending, in_progress, completed) that gets tracked explicitly. This has three benefits: (1) users can see what the agent intends to do before it does it, (2) developers can debug agent behavior by inspecting the plan state, (3) the agent itself can refer back to its plan in later turns when earlier context has scrolled away.",
"alternatives": "The model could plan internally via chain-of-thought reasoning (as it does in v0/v1). Internal planning works but is invisible and ephemeral -- once the thinking scrolls out of context, the plan is lost. Claude's extended thinking is another option, but it's not inspectable by the user or by downstream tools.",
"zh": {
"title": "通过 TodoWrite 让计划可见",
"description": "我们不让模型在思维链中默默规划,而是强制通过 TodoWrite 工具将计划外化。每个计划项都有可追踪的状态(pending、in_progress、completed)。这有三个好处:(1) 用户可以在执行前看到 agent 打算做什么;(2) 开发者可以通过检查计划状态来调试 agent 行为;(3) agent 自身可以在后续轮次中引用计划,即使早期上下文已经滚出窗口。"
},
"ja": {
"title": "TodoWrite による計画の可視化",
"description": "モデルが思考の連鎖の中で黙って計画するのではなく、TodoWrite ツールを通じて計画を外部化することを強制します。各計画項目には追跡可能なステータス(pending、in_progress、completed)があります。利点は3つ:(1) ユーザーがエージェントの意図を実行前に確認できる、(2) 開発者が計画状態を検査してデバッグできる、(3) エージェント自身が以前のコンテキストがスクロールアウトした後でも計画を参照できる。"
}
},
{
"id": "single-in-progress",
"title": "Only One Task Can Be In-Progress",
"description": "The TodoWrite tool enforces that at most one task has status 'in_progress' at any time. If the model tries to start a second task, it must first complete or abandon the current one. This constraint prevents a subtle failure mode: models that try to 'multitask' by interleaving work on multiple items tend to lose track of state and produce half-finished results. Sequential focus produces higher quality than parallel thrashing.",
"alternatives": "Allowing multiple in-progress items would let the agent context-switch between tasks, which seems more flexible. In practice, LLMs handle context-switching poorly -- they lose track of which task they were working on and mix up details between tasks. The single-focus constraint is a guardrail that improves output quality.",
"zh": {
"title": "同一时间只允许一个任务进行中",
"description": "TodoWrite 工具强制要求任何时候最多只能有一个任务处于 in_progress 状态。如果模型想开始第二个任务,必须先完成或放弃当前任务。这个约束防止了一种隐蔽的失败模式:试图通过交替处理多个项目来'多任务'的模型,往往会丢失状态并产出半成品。顺序执行的专注度远高于并行切换。"
},
"ja": {
"title": "同時に進行中にできるタスクは1つだけ",
"description": "TodoWrite ツールは、同時に 'in_progress' 状態のタスクを最大1つに制限します。モデルが2つ目のタスクを開始しようとする場合、まず現在のタスクを完了または中断する必要があります。この制約は微妙な失敗モードを防ぎます:複数の項目を交互に処理して「マルチタスク」しようとするモデルは、状態を見失い中途半端な結果を生みがちです。逐次的な集中は並行的な切り替えよりも高品質な出力を生み出します。"
}
},
{
"id": "max-twenty-items",
"title": "Maximum of 20 Plan Items",
"description": "TodoWrite caps the plan at 20 items. This is a deliberate constraint against over-planning. Models tend to decompose tasks into increasingly fine-grained steps when unconstrained, producing 50-item plans where each step is trivial. Long plans are fragile: if step 15 fails, the remaining 35 steps may all be invalid. Short plans (under 20 items) stay at the right abstraction level and are easier to adapt when reality diverges from the plan.",
"alternatives": "No cap would give the model full flexibility, but in practice leads to absurdly detailed plans. A dynamic cap (proportional to task complexity) would be smarter but adds complexity. The fixed cap of 20 is a simple heuristic that works well empirically -- most real coding tasks can be expressed in 5-15 meaningful steps.",
"zh": {
"title": "计划项上限为 20 条",
"description": "TodoWrite 将计划项限制在 20 条以内。这是对过度规划的刻意约束。不加限制时,模型倾向于将任务分解成越来越细粒度的步骤,产出 50 条的计划,每一步都微不足道。冗长的计划很脆弱:如果第 15 步失败,剩下的 35 步可能全部作废。20 条以内的短计划保持在正确的抽象层级,更容易在现实偏离计划时做出调整。"
},
"ja": {
"title": "計画項目の上限は20個",
"description": "TodoWrite は計画を20項目に制限します。これは過度な計画に対する意図的な制約です。制約がないとモデルはタスクをどんどん細かいステップに分解し、各ステップが些末な50項目の計画を作りがちです。長い計画は脆弱です:ステップ15が失敗すると残りの35ステップは全て無効になりかねません。20項目以内の短い計画は適切な抽象度を保ち、現実が計画から逸脱した際の適応が容易です。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s04.json
================================================
{
"version": "s04",
"decisions": [
{
"id": "context-isolation",
"title": "Subagents Get Fresh Context, Not Shared History",
"description": "When a parent agent spawns a subagent via the Task tool, the subagent starts with a clean message history containing only the system prompt and the delegated task description. It does NOT inherit the parent's conversation. This is context isolation: the subagent can focus entirely on its specific subtask without being distracted by hundreds of messages from the parent's broader conversation. The result is returned to the parent as a single tool_result, collapsing potentially dozens of subagent turns into one concise answer.",
"alternatives": "Sharing the parent's full context would give the subagent more information, but it would also flood the subagent with irrelevant details. Context window is finite -- filling it with parent history leaves less room for the subagent's own work. Fork-based approaches (copy the parent context) are a middle ground but still waste tokens on irrelevant history.",
"zh": {
"title": "子代理获得全新上下文,而非共享历史",
"description": "当父代理通过 Task 工具创建子代理时,子代理从全新的消息历史开始,只包含系统提示词和委派的任务描述,不继承父代理的对话。这就是上下文隔离:子代理可以完全专注于特定子任务,不会被父代理长达数百条消息的对话干扰。结果作为单条 tool_result 返回给父代理,将子代理可能数十轮的交互压缩为一个简洁的回答。"
},
"ja": {
"title": "サブエージェントは共有履歴ではなく新しいコンテキストを取得",
"description": "親エージェントが Task ツールでサブエージェントを生成すると、サブエージェントはシステムプロンプトと委任されたタスク説明のみを含むクリーンなメッセージ履歴から開始します。親の会話は引き継ぎません。これがコンテキスト分離です:サブエージェントは親の広範な会話の何百ものメッセージに気を取られることなく、特定のサブタスクに完全に集中できます。結果は単一の tool_result として親に返され、サブエージェントの数十ターンが1つの簡潔な回答に凝縮されます。"
}
},
{
"id": "tool-filtering",
"title": "Explore Agents Cannot Write Files",
"description": "When spawning a subagent with the 'Explore' type, it receives only read-only tools: bash (with restrictions), read_file, and search tools. It cannot call write_file or edit_file. This implements the principle of least privilege: an agent tasked with 'find all usages of function X' doesn't need write access. Removing write tools eliminates the risk of accidental file modification during exploration, and it also narrows the tool space so the model makes better decisions with fewer options.",
"alternatives": "Giving all subagents full tool access is simpler to implement but violates least privilege. A permission-request system (subagent asks parent for write access) adds complexity and latency. Static tool filtering by role is the pragmatic middle ground -- simple to implement, effective at preventing accidents.",
"zh": {
"title": "Explore 代理不能写入文件",
"description": "创建 Explore 类型的子代理时,它只获得只读工具:bash(有限制)、read_file 和搜索工具,不能调用 write_file 或 edit_file。这实现了最小权限原则:一个被委派'查找函数 X 所有使用位置'的代理不需要写权限。移除写工具消除了探索过程中误修改文件的风险,同时缩小了工具空间,让模型在更少的选项中做出更好的决策。"
},
"ja": {
"title": "Explore エージェントはファイルを書き込めない",
"description": "Explore タイプのサブエージェントを生成すると、読み取り専用ツールのみが提供されます:bash(制限付き)、read_file、検索ツール。write_file や edit_file は使えません。これは最小権限の原則の実装です:「関数 X の全使用箇所を見つける」タスクに書き込み権限は不要です。書き込みツールを除外することで探索中の誤ったファイル変更リスクを排除し、ツール空間を狭めてモデルがより良い判断を下せるようにします。"
}
},
{
"id": "no-recursive-task",
"title": "Subagents Cannot Spawn Their Own Subagents",
"description": "The Task tool is not included in the subagent's tool set. A subagent must complete its work directly -- it cannot delegate further. This prevents infinite delegation loops: without this constraint, an agent could spawn a subagent that spawns another subagent, each one re-delegating the same task in slightly different words, consuming tokens without making progress. One level of delegation handles the vast majority of use cases. If a task is too complex for a single subagent, the parent should decompose it differently.",
"alternatives": "Allowing recursive delegation (bounded by depth) would handle deeply nested tasks but adds complexity and the risk of runaway token consumption. In practice, single-level delegation covers most real-world coding tasks. Multi-level delegation is addressed in later versions (v6+) through persistent team structures instead of recursive spawning.",
"zh": {
"title": "子代理不能再创建子代理",
"description": "Task 工具不包含在子代理的工具集中。子代理必须直接完成工作,不能继续委派。这防止了无限委派循环:没有这个约束,一个代理可能创建子代理,子代理又创建子代理,每一层都用略微不同的措辞重新委派同一任务,消耗 token 却毫无进展。一层委派足以处理绝大多数场景。如果任务对单个子代理来说太复杂,应该由父代理重新分解。"
},
"ja": {
"title": "サブエージェントは自身のサブエージェントを生成できない",
"description": "Task ツールはサブエージェントのツールセットに含まれません。サブエージェントは作業を直接完了しなければならず、さらなる委任はできません。これにより無限委任ループを防止します:この制約がなければ、エージェントがサブエージェントを生成し、そのサブエージェントがさらにサブエージェントを生成し、それぞれが微妙に異なる言葉で同じタスクを再委任してトークンを消費するだけで進捗しない可能性があります。一段階の委任で大多数のユースケースに対応できます。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s05.json
================================================
{
"version": "s05",
"decisions": [
{
"id": "tool-result-injection",
"title": "Skills Inject via tool_result, Not System Prompt",
"description": "When the agent invokes the Skill tool, the skill's content (a SKILL.md file) is returned as a tool_result in a user message, not injected into the system prompt. This is a deliberate caching optimization: the system prompt remains static across turns, which means API providers can cache it (Anthropic's prompt caching, OpenAI's system message caching). If skill content were in the system prompt, it would change every time a new skill is loaded, invalidating the cache. By putting dynamic content in tool_result, we keep the expensive system prompt cacheable while still getting skill knowledge into context.",
"alternatives": "Injecting skills into the system prompt is simpler and gives skills higher priority in the model's attention. But it breaks prompt caching (every skill load creates a new system prompt variant) and bloats the system prompt over time as skills accumulate. The tool_result approach keeps things cache-friendly at the cost of slightly lower attention priority.",
"zh": {
"title": "技能通过 tool_result 注入,而非系统提示词",
"description": "当 agent 调用 Skill 工具时,技能内容(SKILL.md 文件)作为 tool_result 在用户消息中返回,而非注入系统提示词。这是一个刻意的缓存优化:系统提示词在各轮次间保持静态,API 提供商可以缓存它(Anthropic 的 prompt caching、OpenAI 的 system message caching)。如果技能内容在系统提示词中,每次加载新技能都会使缓存失效。将动态内容放在 tool_result 中,既保持了昂贵的系统提示词可缓存,又让技能知识进入了上下文。"
},
"ja": {
"title": "スキルはシステムプロンプトではなく tool_result で注入",
"description": "エージェントが Skill ツールを呼び出すと、スキルの内容(SKILL.md ファイル)はシステムプロンプトへの注入ではなく、ユーザーメッセージ内の tool_result として返されます。これは意図的なキャッシュ最適化です:システムプロンプトはターン間で静的に保たれるため、API プロバイダーがキャッシュできます(Anthropic のプロンプトキャッシュ、OpenAI のシステムメッセージキャッシュ)。スキル内容がシステムプロンプト内にあると、新しいスキルをロードするたびにキャッシュが無効化されます。動的コンテンツを tool_result に配置することで、高コストなシステムプロンプトのキャッシュ可能性を維持しつつ、スキル知識をコンテキストに取り込めます。"
}
},
{
"id": "lazy-loading",
"title": "On-Demand Skill Loading Instead of Upfront",
"description": "Skills are not loaded at startup. The agent starts with only the skill names and descriptions (from frontmatter). When the agent decides it needs a specific skill, it calls the Skill tool, which loads the full SKILL.md body into context. This keeps the initial prompt small and focused. An agent solving a Python bug doesn't need the Kubernetes deployment skill loaded -- that would waste context window space and potentially confuse the model with irrelevant instructions.",
"alternatives": "Loading all skills upfront guarantees the model always has all knowledge available, but wastes tokens on irrelevant skills and may hit context limits. A recommendation system (model suggests skills, human approves) adds latency. Lazy loading lets the model self-serve the knowledge it needs, when it needs it.",
"zh": {
"title": "按需加载技能而非预加载",
"description": "技能不会在启动时加载。Agent 初始只拥有技能名称和描述(来自 frontmatter)。当 agent 判断需要特定技能时,调用 Skill 工具将完整的 SKILL.md 内容加载到上下文中。这保持了初始提示词的精简。一个正在修复 Python bug 的 agent 不需要加载 Kubernetes 部署技能——那会浪费上下文窗口空间,还可能用无关指令干扰模型。"
},
"ja": {
"title": "起動時ではなくオンデマンドでスキルを読み込み",
"description": "スキルは起動時に読み込まれません。エージェントは最初、スキルの名前と説明(フロントマターから)のみを持ちます。エージェントが特定のスキルが必要だと判断すると、Skill ツールを呼び出して完全な SKILL.md の内容をコンテキストに読み込みます。これにより初期プロンプトを小さく保ちます。Python のバグを修正しているエージェントに Kubernetes デプロイのスキルは不要です――コンテキストウィンドウの無駄遣いであり、無関係な指示でモデルを混乱させかねません。"
}
},
{
"id": "frontmatter-body-split",
"title": "YAML Frontmatter + Markdown Body in SKILL.md",
"description": "Each SKILL.md file has two parts: YAML frontmatter (name, description, globs) and a markdown body (the actual instructions). The frontmatter serves as metadata for the skill registry -- it's what gets listed when the agent asks 'what skills are available?' The body is the payload that gets loaded on demand. This separation means you can list 100 skills (reading only frontmatter, a few bytes each) without loading 100 full instruction sets (potentially thousands of tokens each).",
"alternatives": "A separate metadata file (skill.yaml + skill.md) would work but doubles the number of files. Embedding metadata in the markdown (as headings or comments) requires parsing the full file to extract metadata. Frontmatter is a well-established convention (Jekyll, Hugo, Astro) that keeps metadata and content co-located but separately parseable.",
"zh": {
"title": "SKILL.md 采用 YAML Frontmatter + Markdown 正文",
"description": "每个 SKILL.md 文件有两部分:YAML frontmatter(名称、描述、globs)和 markdown 正文(实际指令)。Frontmatter 作为技能注册表的元数据——当 agent 问'有哪些可用技能'时,展示的就是这些信息。正文是按需加载的有效负载。这种分离意味着可以列出 100 个技能(每个只读几字节的 frontmatter)而不必加载 100 套完整指令集(每套可能数千 token)。"
},
"ja": {
"title": "SKILL.md で YAML フロントマター + Markdown 本文",
"description": "各 SKILL.md ファイルは2つの部分で構成されます:YAML フロントマター(名前、説明、globs)と Markdown 本文(実際の指示)。フロントマターはスキルレジストリのメタデータとして機能し、エージェントが「どんなスキルが利用可能か」と問い合わせた際に一覧表示されます。本文はオンデマンドで読み込まれるペイロードです。この分離により、100個のスキル一覧表示(各数バイトのフロントマターのみ読み取り)が100個の完全な指示セット(各数千トークン)のロードなしに可能になります。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s06.json
================================================
{
"version": "s06",
"decisions": [
{
"id": "three-layer-compression",
"title": "Three-Layer Compression Strategy",
"description": "Context management uses three distinct layers, each with different cost/benefit profiles. (1) Microcompact runs every turn and is nearly free: it truncates tool_result blocks from older messages, stripping verbose command output that's no longer needed. (2) Auto_compact triggers when token count exceeds a threshold: it calls the LLM to generate a conversation summary, which is expensive but dramatically reduces context size. (3) Manual compact is user-triggered for explicit 'start fresh' moments. Layering these means the cheap operation runs constantly (keeping context tidy) while the expensive operation runs rarely (only when actually needed).",
"alternatives": "A single compression strategy (e.g., always summarize at 80% capacity) would be simpler but wasteful -- most of the time, microcompact alone keeps things manageable. A sliding window (drop oldest N messages) is cheap but loses important context. The three-layer approach gives the best token efficiency: cheap cleanup constantly, expensive summarization rarely.",
"zh": {
"title": "三层压缩策略",
"description": "上下文管理使用三个独立的层次,各有不同的成本收益比。(1) 微压缩每轮都运行,几乎零成本:它截断旧消息中的 tool_result 块,去除不再需要的冗长命令输出。(2) 自动压缩在 token 数超过阈值时触发:调用 LLM 生成对话摘要,代价高但能大幅缩减上下文。(3) 手动压缩由用户触发,用于明确的'重新开始'场景。分层意味着低成本操作持续运行(保持上下文整洁),而高成本操作很少触发(仅在真正需要时)。"
},
"ja": {
"title": "3層圧縮戦略",
"description": "コンテキスト管理は、異なるコスト・効果プロファイルを持つ3つの層を使用します。(1) マイクロコンパクトは毎ターン実行されほぼ無コスト:古いメッセージの tool_result ブロックを切り詰め、不要な冗長出力を除去します。(2) 自動コンパクトはトークン数が閾値を超えると発動:LLM を呼び出して会話の要約を生成し、コストは高いがコンテキストサイズを劇的に削減します。(3) 手動コンパクトはユーザーが明示的に「最初からやり直し」する時に使用します。この階層化により、安価な操作が常に実行され(コンテキストを整頓)、高価な操作はめったに実行されません(本当に必要な時のみ)。"
}
},
{
"id": "min-savings-threshold",
"title": "MIN_SAVINGS = 20,000 Tokens Before Compressing",
"description": "Auto_compact only triggers when the estimated savings (current tokens minus estimated summary size) exceed 20,000 tokens. Compression is not free: the summary itself consumes tokens, plus there's the API call cost to generate it. If the conversation is only 25,000 tokens, compressing might save 5,000 tokens but cost an API call and produce a summary that's less coherent than the original. The 20K threshold ensures compression only happens when the savings meaningfully exceed the overhead.",
"alternatives": "A percentage-based threshold (compress when context is 80% full) adapts to different context window sizes but doesn't account for the fixed cost of generating a summary. A fixed threshold of 10K would compress more aggressively but often isn't worth it. The 20K value was chosen empirically: it's the point where compression savings consistently outweigh the quality loss from summarization.",
"zh": {
"title": "最小节省量 = 20,000 Token 才触发压缩",
"description": "自动压缩仅在估算节省量(当前 token 数减去预估摘要大小)超过 20,000 token 时才触发。压缩不是免费的:摘要本身会消耗 token,还有生成摘要的 API 调用成本。如果对话只有 25,000 token,压缩可能节省 5,000 token,但需要一次 API 调用,且产出的摘要可能不如原文连贯。20K 的阈值确保只在节省量明显超过开销时才进行压缩。"
},
"ja": {
"title": "圧縮前に MIN_SAVINGS = 20,000 トークンが必要",
"description": "自動コンパクトは推定節約量(現在のトークン数マイナス推定要約サイズ)が20,000トークンを超えた場合にのみ発動します。圧縮は無料ではありません:要約自体がトークンを消費し、さらに生成のための API コール費用がかかります。会話が25,000トークンしかない場合、圧縮で5,000トークン節約できても、API コールが必要で元の会話より一貫性の低い要約になる可能性があります。20K の閾値は、節約量がオーバーヘッドを確実に上回る場合にのみ圧縮を実行することを保証します。"
}
},
{
"id": "summary-replaces-all",
"title": "Summary Replaces ALL Messages, Not Partial History",
"description": "When auto_compact fires, it generates a summary and replaces the ENTIRE message history with that summary. It does not keep the last N messages alongside the summary. This avoids a subtle coherence problem: if you keep recent messages plus a summary of older ones, the model sees two representations of overlapping content. The summary might say 'we decided to use approach X' while a recent message still shows the deliberation process, creating contradictory signals. A clean summary is a single coherent narrative.",
"alternatives": "Keeping the last 5-10 messages alongside the summary preserves recent detail and gives the model more to work with. But it creates the overlap problem described above, and makes the total context size less predictable. Some systems use a 'sliding window + summary' approach which works but requires careful tuning of the overlap region.",
"zh": {
"title": "摘要替换全部消息,而非保留部分历史",
"description": "自动压缩触发时,生成摘要并替换全部消息历史,不会在摘要旁保留最近的 N 条消息。这避免了一个微妙的连贯性问题:如果同时保留近期消息和旧消息的摘要,模型会看到重叠内容的两种表示。摘要可能说'我们决定使用方案 X',而近期消息仍在展示讨论过程,产生矛盾信号。干净的摘要是一个连贯的单一叙述。"
},
"ja": {
"title": "要約が部分的な履歴ではなく全メッセージを置換",
"description": "自動コンパクトが発動すると、要約を生成してメッセージ履歴の全体をその要約で置換します。要約と並べて直近 N 件のメッセージを保持することはしません。これにより微妙な一貫性の問題を回避します:直近のメッセージと古いメッセージの要約を併存させると、モデルは重複するコンテンツの2つの表現を見ることになります。要約が「アプローチ X を使うことに決めた」と言う一方で、直近のメッセージにはまだ検討過程が表示されているかもしれず、矛盾するシグナルを生じます。クリーンな要約は単一の一貫した物語です。"
}
},
{
"id": "transcript-archival",
"title": "Full Conversation Archived to JSONL on Disk",
"description": "Even though context is compressed in memory, the full uncompressed conversation is appended to a JSONL file on disk. Every message, every tool call, every result -- nothing is lost. This means compression is a lossy operation on the in-memory context but a lossless operation on the permanent record. Post-hoc analysis (debugging agent behavior, computing token usage, training data extraction) can always work from the complete transcript. The JSONL format is append-only, making it safe for concurrent writes and easy to stream-process.",
"alternatives": "Not archiving saves disk space but makes debugging hard -- when the agent makes a mistake, you can't see what it was 'thinking' 200 messages ago because that context was compressed away. Database storage (SQLite) would provide queryability but adds a dependency. JSONL is the simplest format that supports append-only writes and line-by-line processing.",
"zh": {
"title": "完整对话以 JSONL 格式归档到磁盘",
"description": "尽管上下文在内存中被压缩,完整的未压缩对话仍会追加到磁盘上的 JSONL 文件中。每条消息、每次工具调用、每个结果都不会丢失。压缩对内存上下文是有损操作,但对永久记录是无损的。事后分析(调试 agent 行为、计算 token 用量、提取训练数据)始终可以基于完整记录进行。JSONL 格式仅追加写入,对并发写入安全,易于流式处理。"
},
"ja": {
"title": "完全な会話を JSONL としてディスクに保存",
"description": "メモリ上でコンテキストが圧縮されても、完全な非圧縮会話はディスク上の JSONL ファイルに追記されます。全てのメッセージ、全てのツール呼び出し、全ての結果――何も失われません。圧縮はインメモリコンテキストに対しては不可逆ですが、永続記録に対しては可逆です。事後分析(エージェントの挙動デバッグ、トークン使用量の計算、学習データの抽出)は常に完全な記録から行えます。JSONL フォーマットは追記専用で、並行書き込みに安全であり行単位の処理が容易です。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s07.json
================================================
{
"version": "s07",
"decisions": [
{
"id": "file-based-persistence",
"title": "Tasks Stored as JSON Files, Not In-Memory",
"description": "Tasks are persisted as JSON files in a .tasks/ directory on the filesystem instead of being held in memory. This has three critical benefits: (1) Tasks survive process crashes -- if the agent dies mid-task, the task board is still on disk when it restarts. (2) Multiple agents can read and write to the same task directory, enabling multi-agent coordination without shared memory. (3) Humans can inspect and manually edit task files for debugging. The filesystem becomes the shared database.",
"alternatives": "In-memory storage (like v2's TodoWrite) is simpler and faster but loses state on crash and doesn't work across multiple agent processes. A proper database (SQLite, Redis) would provide ACID guarantees and better concurrency, but adds a dependency and operational complexity. Files are the zero-dependency persistence layer that works everywhere.",
"zh": {
"title": "任务存储为 JSON 文件,而非内存",
"description": "任务以 JSON 文件形式持久化在 .tasks/ 目录中,而非保存在内存里。这有三个关键好处:(1) 任务在进程崩溃后仍然存在——如果 agent 在任务中途崩溃,重启后任务板仍在磁盘上;(2) 多个 agent 可以读写同一任务目录,无需共享内存即可实现多代理协调;(3) 人类可以查看和手动编辑任务文件来调试。文件系统就是共享数据库。"
},
"ja": {
"title": "タスクをメモリではなく JSON ファイルとして保存",
"description": "タスクはメモリ内ではなく .tasks/ ディレクトリに JSON ファイルとして永続化されます。3つの重要な利点があります:(1) プロセスのクラッシュ後もタスクが存続する――エージェントがタスク途中でクラッシュしても、再起動時にタスクボードはディスク上に残っています。(2) 複数のエージェントが同じタスクディレクトリを読み書きでき、共有メモリなしにマルチエージェント連携が可能になります。(3) 人間がデバッグのためにタスクファイルを検査・手動編集できます。ファイルシステムが共有データベースになります。"
}
},
{
"id": "dependency-graph",
"title": "Tasks Have blocks/blockedBy Dependency Fields",
"description": "Each task can declare which other tasks it blocks (downstream dependents) and which tasks block it (upstream dependencies). An agent will not start a task that has unresolved blockedBy dependencies. This is essential for multi-agent coordination: when Agent A is writing the database schema and Agent B needs to write queries against it, Agent B's task is blockedBy Agent A's task. Without dependencies, both agents might start simultaneously and Agent B would work against a schema that doesn't exist yet.",
"alternatives": "Simple priority ordering (high/medium/low) doesn't capture 'task B literally cannot start until task A finishes.' A centralized coordinator that assigns tasks in order would work but creates a single point of failure and bottleneck. Declarative dependencies let each agent independently determine what it can work on by reading the task files.",
"zh": {
"title": "任务具有 blocks/blockedBy 依赖字段",
"description": "每个任务可以声明它阻塞哪些任务(下游依赖)以及它被哪些任务阻塞(上游依赖)。Agent 不会开始有未解决 blockedBy 依赖的任务。这对多代理协调至关重要:当 Agent A 在编写数据库 schema、Agent B 需要写查询时,Agent B 的任务被 Agent A 的任务阻塞。没有依赖关系,两个 agent 可能同时开始,而 Agent B 会针对一个尚不存在的 schema 工作。"
},
"ja": {
"title": "タスクに blocks/blockedBy 依存関係フィールド",
"description": "各タスクは、自分がブロックするタスク(下流の依存先)と、自分をブロックするタスク(上流の依存元)を宣言できます。エージェントは未解決の blockedBy 依存がある タスクを開始しません。これはマルチエージェント連携に不可欠です:エージェント A がデータベーススキーマを書いていてエージェント B がそれに対するクエリを書く必要がある場合、B のタスクは A のタスクにブロックされます。依存関係がなければ両エージェントが同時に開始し、B はまだ存在しないスキーマに対して作業することになります。"
}
},
{
"id": "task-default-todo-coexistence",
"title": "Task as Course Default, Todo Still Useful",
"description": "TaskManager extends the Todo mental model and becomes the default workflow from s07 onward in this course. Both track work items with statuses, but TaskManager adds file persistence (survives crashes), dependency tracking (blocks/blockedBy), ownership fields, and multi-process coordination. Todo remains useful for short, linear, one-shot tracking where heavyweight coordination is unnecessary.",
"alternatives": "Using only Todo keeps the model minimal but weak for long-running or collaborative work. Using only Task everywhere maximizes consistency but can feel heavy for tiny one-off tasks.",
"zh": {
"title": "Task 为课程主线,Todo 仍有适用场景",
"description": "TaskManager 延续了 Todo 的心智模型,并在本课程 s07 之后成为默认主线。两者都管理带状态的任务项,但 TaskManager 增加了文件持久化(崩溃后可恢复)、依赖追踪(blocks/blockedBy)、owner 字段与多进程协作能力。Todo 仍适合短、线性、一次性的轻量跟踪。"
},
"ja": {
"title": "Task を主線にしつつ Todo も併存",
"description": "TaskManager は Todo のメンタルモデルを拡張し、本コースでは s07 以降のデフォルトになる。どちらもステータス付き作業項目を扱うが、TaskManager にはファイル永続化(クラッシュ耐性)、依存関係追跡(blocks/blockedBy)、owner、マルチプロセス協調がある。Todo は短く直線的な単発作業では引き続き有効。"
}
},
{
"id": "task-write-discipline",
"title": "Durability Needs Write Discipline",
"description": "File persistence reduces context loss, but it does not remove concurrent-write risks by itself. Before writing task state, reload the JSON, validate expected status/dependency fields, and then save atomically. This prevents one agent from silently overwriting another agent's transition.",
"alternatives": "Blind overwrite writes are simpler but can corrupt coordination state under parallel execution. A database with optimistic locking would enforce stronger safety, but the course keeps file-based state for zero-dependency teaching.",
"zh": {
"title": "持久化仍需要写入纪律",
"description": "文件持久化能降低上下文丢失,但不会自动消除并发写入风险。写任务状态前应先重读 JSON、校验 `status/blockedBy` 是否符合预期,再原子写回,避免不同 agent 悄悄覆盖彼此状态。"
},
"ja": {
"title": "耐久性には書き込み規律が必要",
"description": "ファイル永続化だけでは並行書き込み競合は防げない。更新前に JSON を再読込し、`status/blockedBy` を検証して原子的に保存することで、他エージェントの遷移上書きを防ぐ。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s08.json
================================================
{
"version": "s08",
"decisions": [
{
"id": "notification-bus",
"title": "threading.Queue as the Notification Bus",
"description": "Background task results are delivered via a threading.Queue instead of direct callbacks. The background thread puts a notification on the queue when its work completes. The main agent loop polls the queue before each LLM call. This decoupling is important: the background thread doesn't need to know anything about the main loop's state or timing. It just drops a message on the queue and moves on. The main loop picks it up at its own pace -- never mid-API-call, never mid-tool-execution. No race conditions, no callback hell.",
"alternatives": "Direct callbacks (background thread calls a function in the main thread) would deliver results faster but create thread-safety issues -- the callback might fire while the main thread is in the middle of building a request. Event-driven systems (asyncio, event emitters) work but add complexity. A queue is the simplest thread-safe communication primitive.",
"zh": {
"title": "用 threading.Queue 作为通知总线",
"description": "后台任务结果通过 threading.Queue 传递,而非直接回调。后台线程在工作完成时向队列放入通知,主 agent 循环在每次 LLM 调用前轮询队列。这种解耦很重要:后台线程无需了解主循环的状态或时序,只需往队列放入消息然后继续。主循环按自己的节奏取出消息——永远不会在 API 调用中途或工具执行中途。没有竞争条件,没有回调地狱。"
},
"ja": {
"title": "threading.Queue を通知バスとして使用",
"description": "バックグラウンドタスクの結果は直接コールバックではなく threading.Queue を通じて配信されます。バックグラウンドスレッドは作業完了時にキューに通知を投入します。メインのエージェントループは各 LLM 呼び出しの前にキューをポーリングします。この疎結合が重要です:バックグラウンドスレッドはメインループの状態やタイミングを一切知る必要がありません。キューにメッセージを入れて先に進むだけです。メインループは自分のペースで取り出します――API 呼び出しの途中でもツール実行の途中でもありません。レースコンディションもコールバック地獄もありません。"
}
},
{
"id": "daemon-threads",
"title": "Background Tasks Run as Daemon Threads",
"description": "Background task threads are created with daemon=True. In Python, daemon threads are killed automatically when the main thread exits. This prevents a common problem: if the main agent completes its work and exits, but a background thread is still running (waiting on a long API call, stuck in a loop), the process would hang indefinitely. With daemon threads, exit is clean -- the main thread finishes, all daemon threads die, process exits. No zombie processes, no cleanup code needed.",
"alternatives": "Non-daemon threads with explicit cleanup (join with timeout, then terminate) give more control over shutdown but require careful lifecycle management. Process-based parallelism (multiprocessing) provides stronger isolation but higher overhead. Daemon threads are the pragmatic choice: minimal code, correct behavior in the common case.",
"zh": {
"title": "后台任务以守护线程运行",
"description": "后台任务线程以 daemon=True 创建。在 Python 中,守护线程在主线程退出时自动被终止。这防止了一个常见问题:如果主 agent 完成工作并退出,但后台线程仍在运行(等待一个长时间 API 调用或陷入循环),进程会无限挂起。使用守护线程,退出是干净的——主线程结束,所有守护线程自动终止,进程退出。没有僵尸进程,不需要清理代码。"
},
"ja": {
"title": "バックグラウンドタスクはデーモンスレッドとして実行",
"description": "バックグラウンドタスクのスレッドは daemon=True で作成されます。Python ではデーモンスレッドはメインスレッドの終了時に自動的に終了されます。これにより一般的な問題を防ぎます:メインエージェントが作業を完了して終了しても、バックグラウンドスレッドがまだ実行中(長い API 呼び出しを待機、ループに陥っている)だとプロセスが無限にハングします。デーモンスレッドならクリーンに終了できます――メインスレッドが終了すると全デーモンスレッドが自動終了し、プロセスが終了します。ゾンビプロセスもクリーンアップコードも不要です。"
}
},
{
"id": "attachment-format",
"title": "Structured Notification Format with Type Tags",
"description": "Notifications from background tasks use a structured format: {\"type\": \"attachment\", \"attachment\": {status, result, ...}} instead of plain text strings. The type tag lets the main loop handle different notification types differently: an 'attachment' might be injected into the conversation as a tool_result, while a 'status_update' might just update a progress indicator. Machine-readable notifications also enable programmatic filtering (show only errors, suppress progress updates) and UI rendering (display status as a progress bar, not raw text).",
"alternatives": "Plain text notifications are simpler but lose structure. The main loop would have to parse free-form text to determine what happened, which is fragile. A class hierarchy (StatusNotification, ResultNotification, ErrorNotification) is more Pythonic but less portable -- JSON structures work the same way regardless of language or serialization format.",
"zh": {
"title": "带类型标签的结构化通知格式",
"description": "后台任务的通知使用结构化格式:{\"type\": \"attachment\", \"attachment\": {status, result, ...}},而非纯文本字符串。类型标签让主循环可以区别处理不同通知类型:attachment 可能作为 tool_result 注入对话,而 status_update 可能只更新进度指示器。机器可读的通知还支持程序化过滤(只显示错误、抑制进度更新)和 UI 渲染(将状态显示为进度条而非原始文本)。"
},
"ja": {
"title": "型タグ付き構造化通知フォーマット",
"description": "バックグラウンドタスクからの通知は構造化フォーマットを使用します:プレーンテキストではなく {\"type\": \"attachment\", \"attachment\": {status, result, ...}} です。型タグによりメインループは異なる通知タイプを異なる方法で処理できます:attachment は会話に tool_result として注入され、status_update は進捗インジケーターの更新のみを行うかもしれません。機械可読な通知はプログラム的なフィルタリング(エラーのみ表示、進捗更新の抑制)や UI レンダリング(ステータスを生テキストではなくプログレスバーとして表示)も可能にします。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s09.json
================================================
{
"version": "s09",
"decisions": [
{
"id": "teammate-vs-subagent",
"title": "Persistent Teammates vs One-Shot Subagents",
"description": "In s04, subagents are ephemeral: spawn, do one task, return result, die. Their knowledge dies with them. In s09, teammates are persistent threads with identity (name, role) and config files. A teammate can complete task A, then be assigned task B, carrying forward everything it learned. Persistent teammates accumulate project knowledge, understand established patterns, and don't need to re-read the same files for every task.",
"alternatives": "One-shot subagents (s04 style) are simpler and provide perfect context isolation -- no risk of one task's context polluting another. But the re-learning cost is high: every new task starts from zero. A middle ground (subagents with shared memory/knowledge base) was considered but adds complexity without the full benefit of persistent identity and state.",
"zh": {
"title": "持久化队友 vs 一次性子智能体",
"description": "在 s04 中,子智能体是临时的:创建、执行一个任务、返回结果、销毁。它们的知识随之消亡。在 s09 中,队友是具有身份(名称、角色)和配置文件的持久化线程。队友可以完成任务 A,然后被分配任务 B,并携带之前学到的所有知识。持久化队友积累项目知识,理解已建立的模式,不需要为每个任务重新阅读相同的文件。"
},
"ja": {
"title": "永続的なチームメイト vs 使い捨てサブエージェント",
"description": "s04 ではサブエージェントは一時的です:生成、1つのタスクを実行、結果を返却、消滅。その知識も一緒に消えます。s09 ではチームメイトはアイデンティティ(名前、役割)と設定ファイルを持つ永続的なスレッドです。チームメイトはタスク A を完了した後、学んだ全てを引き継いでタスク B に割り当てられます。永続的なチームメイトはプロジェクトの知識を蓄積し、確立されたパターンを理解し、タスクごとに同じファイルを再読する必要がありません。"
}
},
{
"id": "file-based-team-config",
"title": "Team Config Persisted to .teams/{name}/config.json",
"description": "Team structure (member names, roles, agent IDs) is stored in a JSON config file, not in any agent's memory. Any agent can discover its teammates by reading the config file -- no need for a discovery service or shared memory. If an agent crashes and restarts, it reads the config to find out who else is on the team. This is consistent with the s07 philosophy: the filesystem is the coordination layer.",
"alternatives": "In-memory team registries are faster but don't survive process restarts and require a central process to maintain. Service discovery (like DNS or a discovery server) is more robust at scale but overkill for a local multi-agent system. File-based config is the simplest approach that works across independent processes.",
"zh": {
"title": "团队配置持久化到 .teams/{name}/config.json",
"description": "团队结构(成员名称、角色、agent ID)存储在 JSON 配置文件中,而非任何 agent 的内存中。任何 agent 都可以通过读取配置文件发现队友——无需发现服务或共享内存。如果 agent 崩溃并重启,它读取配置即可知道团队中还有谁。这与 s07 的理念一致:文件系统就是协调层。配置文件人类可读,便于手动添加或移除团队成员、调试团队配置问题。"
},
"ja": {
"title": "チーム設定を .teams/{name}/config.json に永続化",
"description": "チーム構成(メンバー名、役割、エージェント ID)はエージェントのメモリではなく JSON 設定ファイルに保存されます。どのエージェントも設定ファイルを読むことでチームメイトを発見できます――ディスカバリーサービスや共有メモリは不要です。エージェントがクラッシュして再起動した場合、設定を読んで他のチームメンバーを把握します。これは s07 の思想と一貫しています:ファイルシステムが連携レイヤーです。"
}
},
{
"id": "tool-filtering-by-role",
"title": "Teammates Get Subset of Tools, Lead Gets All",
"description": "The team lead receives ALL_TOOLS (including spawn, send, read_inbox, etc.) while teammates receive TEAMMATE_TOOLS (a reduced set focused on task execution). This enforces a clear separation of concerns: teammates focus on doing work (coding, testing, researching), while the lead focuses on coordination (creating tasks, assigning work, managing communication). Giving teammates coordination tools would let them create their own sub-teams or reassign tasks, undermining the lead's ability to maintain a coherent plan.",
"alternatives": "Giving all agents identical tools is simpler and more egalitarian, but in practice leads to coordination chaos -- multiple agents trying to manage each other, creating conflicting task assignments. Static role-based filtering is predictable and easy to reason about.",
"zh": {
"title": "队友获得工具子集,组长获得全部工具",
"description": "团队组长获得 ALL_TOOLS(包括 spawn、send、read_inbox 等),而队友获得 TEAMMATE_TOOLS(专注于任务执行的精简工具集)。这强制了清晰的职责分离:队友专注于做事(编码、测试、研究),组长专注于协调(创建任务、分配工作、管理沟通)。给队友协调工具会让他们创建自己的子团队或重新分配任务,破坏组长维持连贯计划的能力。"
},
"ja": {
"title": "チームメイトはツールのサブセット、リーダーは全ツール",
"description": "チームリーダーは ALL_TOOLS(spawn、send、read_inbox など含む)を受け取り、チームメイトは TEAMMATE_TOOLS(タスク実行に特化した縮小セット)を受け取ります。これにより明確な関心の分離が強制されます:チームメイトは作業(コーディング、テスト、調査)に集中し、リーダーは調整(タスク作成、作業割り当て、コミュニケーション管理)に集中します。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s10.json
================================================
{
"version": "s10",
"decisions": [
{
"id": "jsonl-inbox",
"title": "JSONL Inbox Files Instead of Shared Memory",
"description": "Each teammate has its own inbox file (a JSONL file in the team directory). Sending a message means appending a JSON line to the recipient's inbox file. Reading messages means reading the inbox file and tracking which line was last read. JSONL is append-only by nature, which means concurrent writers don't corrupt each other's data (appends to different file positions). This works across processes without any shared memory, mutex, or IPC mechanism. It's also crash-safe: if the writer crashes mid-append, the worst case is one partial line that the reader can skip.",
"alternatives": "Shared memory (Python multiprocessing.Queue) would be faster but doesn't work if agents are separate processes launched independently. A message broker (Redis, RabbitMQ) provides robust pub/sub but adds infrastructure dependencies. Unix domain sockets would work but are harder to debug (no human-readable message log). JSONL files are the simplest approach that provides persistence, cross-process communication, and debuggability.",
"zh": {
"title": "JSONL 收件箱文件而非共享内存",
"description": "每个队友都有自己的收件箱文件(团队目录中的 JSONL 文件)。发送消息意味着向接收者的收件箱文件追加一行 JSON。读取消息意味着读取收件箱文件并追踪上次读到的行。JSONL 天然是仅追加的,这意味着并发写入不会破坏彼此的数据(追加到不同的文件位置)。这在无需共享内存、互斥锁或 IPC 机制的情况下跨进程工作。它也是崩溃安全的:如果写入者在追加中途崩溃,最坏情况是一行不完整的数据,读取者可以跳过。"
},
"ja": {
"title": "共有メモリではなく JSONL インボックスファイル",
"description": "各チームメイトはチームディレクトリ内に独自のインボックスファイル(JSONL ファイル)を持ちます。メッセージの送信は受信者のインボックスファイルに JSON 行を追記することです。メッセージの読み取りはインボックスファイルを読んで最後に読んだ行を追跡することです。JSONL は本質的に追記専用で、並行ライターが互いのデータを破壊しません(異なるファイル位置への追記)。共有メモリ、ミューテックス、IPC メカニズムなしにプロセス間で動作します。"
}
},
{
"id": "five-message-types",
"title": "Exactly Five Message Types Cover All Coordination Patterns",
"description": "The messaging system supports exactly five types: (1) 'message' for point-to-point communication between two agents, (2) 'broadcast' for team-wide announcements, (3) 'shutdown_request' for graceful termination, (4) 'shutdown_response' for acknowledging shutdown, (5) 'plan_approval_response' for the lead to approve or reject a teammate's plan. These five types map to the fundamental coordination patterns: direct communication, broadcast, lifecycle management, and approval workflows.",
"alternatives": "A single generic message type with metadata fields would be more flexible but makes it harder to enforce protocol correctness. Many more types (10+) would provide finer-grained semantics but increase the model's decision burden. Five types is the sweet spot where every type has a clear, distinct purpose.",
"zh": {
"title": "恰好五种消息类型覆盖所有协调模式",
"description": "消息系统恰好支持五种类型:(1) message 用于两个 agent 间的点对点通信;(2) broadcast 用于全团队公告;(3) shutdown_request 用于优雅终止;(4) shutdown_response 用于确认终止;(5) plan_approval_response 用于组长批准或拒绝队友的计划。这五种类型映射到基本协调模式:直接通信、广播、生命周期管理和审批流程。"
},
"ja": {
"title": "正確に5つのメッセージタイプで全連携パターンをカバー",
"description": "メッセージングシステムは正確に5つのタイプをサポートします:(1) message は2つのエージェント間のポイントツーポイント通信、(2) broadcast はチーム全体への通知、(3) shutdown_request はグレースフルな終了要求、(4) shutdown_response はシャットダウンの確認応答、(5) plan_approval_response はリーダーによるチームメイトの計画の承認・却下。"
}
},
{
"id": "inbox-before-api-call",
"title": "Check Inbox Before Every LLM Call",
"description": "Teammates check their inbox file at the top of every agent loop iteration, before calling the LLM API. This ensures maximum responsiveness to incoming messages: a shutdown request is seen within one loop iteration (typically seconds), not after the current task completes (potentially minutes). The inbox check is cheap (read a small file, check if new lines exist) compared to the LLM call (seconds of latency, thousands of tokens). This placement also means incoming messages can influence the next LLM call -- a message saying 'stop working on X, switch to Y' takes effect immediately.",
"alternatives": "Checking inbox after each tool execution would be more responsive but adds overhead to every tool call, which is more frequent than LLM calls. A separate watcher thread could monitor the inbox continuously but adds threading complexity. Checking once per LLM call is the pragmatic sweet spot: responsive enough for coordination, cheap enough to not impact performance.",
"zh": {
"title": "每次 LLM 调用前检查收件箱",
"description": "队友在每次 agent 循环迭代的顶部、调用 LLM API 之前检查收件箱文件。这确保了对传入消息的最大响应性:一个终止请求会在一个循环迭代内被看到(通常几秒钟),而非在当前任务完成后(可能数分钟)。收件箱检查成本很低(读取小文件,检查是否有新行),相比 LLM 调用(秒级延迟,数千 token)微不足道。这个位置还意味着传入消息可以影响下一次 LLM 调用——一条'停止 X,转去做 Y'的消息会立即生效。"
},
"ja": {
"title": "毎回の LLM 呼び出し前にインボックスを確認",
"description": "チームメイトはエージェントループの各イテレーションの冒頭、LLM API を呼び出す前にインボックスファイルを確認します。これにより受信メッセージへの応答性を最大化します:シャットダウンリクエストは1ループイテレーション以内(通常数秒)で確認され、現在のタスク完了後(数分かかる可能性)ではありません。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s11.json
================================================
{
"version": "s11",
"decisions": [
{
"id": "polling-not-events",
"title": "Polling for Unclaimed Tasks Instead of Event-Driven Notification",
"description": "Autonomous teammates poll the shared task board every ~1 second to find unclaimed tasks, rather than waiting for event-driven notifications. Polling is fundamentally simpler than pub/sub: there's no subscription management, no event routing, no missed-event bugs. With file-based persistence, polling is just 'read the directory listing' -- a cheap operation that works regardless of how many agents are running. The 1-second interval balances responsiveness (new tasks are discovered quickly) against filesystem overhead (not hammering the disk with reads).",
"alternatives": "Event-driven notification (file watchers via inotify/fsevents, or a pub/sub channel) would reduce latency from seconds to milliseconds. But file watchers are platform-specific and unreliable across network filesystems. A message broker would work but adds infrastructure. For a system where tasks take minutes to complete, discovering new tasks in 1 second instead of 10 milliseconds makes no practical difference.",
"zh": {
"title": "轮询未认领任务而非事件驱动通知",
"description": "自主队友每隔约 1 秒轮询共享任务板以寻找未认领的任务,而非等待事件驱动的通知。轮询从根本上比发布/订阅更简单:没有订阅管理、没有事件路由、没有事件丢失的 bug。在基于文件的持久化下,轮询就是'读取目录列表'——一个低成本操作,无论有多少 agent 在运行都能正常工作。1 秒的间隔平衡了响应性(新任务被快速发现)和文件系统开销(不会过度读取磁盘)。"
},
"ja": {
"title": "イベント駆動通知ではなくポーリングで未割り当てタスクを発見",
"description": "自律的なチームメイトはイベント駆動の通知を待つのではなく、約1秒ごとに共有タスクボードをポーリングして未割り当てタスクを探します。ポーリングはパブ/サブより根本的にシンプルです:サブスクリプション管理、イベントルーティング、イベント欠落バグがありません。ファイルベースの永続化では、ポーリングは「ディレクトリ一覧を読む」だけで、実行中のエージェント数に関係なく動作する安価な操作です。"
}
},
{
"id": "idle-timeout",
"title": "60-Second Idle Timeout Before Self-Termination",
"description": "When an autonomous teammate has no tasks to work on and no messages in its inbox, it waits up to 60 seconds before giving up and shutting down. This prevents zombie teammates that wait forever for work that never comes -- a real problem when the lead forgets to send a shutdown request, or when all remaining tasks are blocked on external events. The 60-second window is long enough that a brief gap between task completions and new task creation won't cause premature shutdown, but short enough that unused teammates don't waste resources.",
"alternatives": "No timeout (wait forever) risks zombie processes. A very short timeout (5s) causes premature exits when the lead is simply thinking or typing. A heartbeat system (lead periodically pings teammates to keep them alive) works but adds protocol complexity. The 60-second fixed timeout is a good default that balances false-positive exits against resource waste.",
"zh": {
"title": "空闲 60 秒后自动终止",
"description": "当自主队友没有任务可做且收件箱中没有消息时,它最多等待 60 秒后放弃并关闭。这防止了永远等待不会到来的工作的僵尸队友——这在组长忘记发送关闭请求、或所有剩余任务都被外部事件阻塞时是真实存在的问题。60 秒窗口足够长,不会因为任务完成到新任务创建之间的短暂间隔而导致过早关闭;又足够短,不会让闲置队友浪费资源。"
},
"ja": {
"title": "60秒のアイドルタイムアウトで自動終了",
"description": "自律的なチームメイトが作業するタスクもインボックスのメッセージもない場合、最大60秒待ってから諦めてシャットダウンします。これにより永遠に来ない仕事を待ち続けるゾンビチームメイトを防ぎます。60秒のウィンドウはタスク完了から新タスク作成までの短い間隔で早期シャットダウンが起きない十分な長さであり、かつ未使用のチームメイトがリソースを浪費しない十分な短さです。"
}
},
{
"id": "identity-after-compression",
"title": "Re-Inject Teammate Identity After Context Compression",
"description": "When auto_compact compresses the conversation, the resulting summary loses crucial metadata: the teammate's name, which team it belongs to, and its agent_id. Without this information, the teammate can't claim tasks (tasks are owned by name), can't check its inbox (inbox files are keyed by agent_id), and can't identify itself in messages. So after every auto_compact, the system re-injects a structured identity block into the conversation: 'You are [name] on team [team], your agent_id is [id], your inbox is at [path].' This is the minimum context needed for the teammate to remain functional after memory loss.",
"alternatives": "Putting identity in the system prompt (which survives compression) would avoid this problem, but violates the cache-friendly static-system-prompt design from s05. Embedding identity in the summary prompt ('when summarizing, always include your name and team') is unreliable -- the LLM might omit it. Explicit post-compression injection is deterministic and guaranteed to work.",
"zh": {
"title": "上下文压缩后重新注入队友身份",
"description": "自动压缩对话时,生成的摘要会丢失关键元数据:队友的名称、所属团队和 agent_id。没有这些信息,队友无法认领任务(任务按名称归属)、无法检查收件箱(收件箱文件以 agent_id 为键)、也无法在消息中表明身份。因此每次自动压缩后,系统会向对话中重新注入一个结构化的身份块:'你是 [team] 团队的 [name],你的 agent_id 是 [id],你的收件箱在 [path]。'这是队友在记忆丢失后保持功能所需的最小上下文。"
},
"ja": {
"title": "コンテキスト圧縮後にチームメイトのアイデンティティを再注入",
"description": "自動コンパクトが会話を圧縮すると、生成された要約は重要なメタデータを失います:チームメイトの名前、所属チーム、agent_id。この情報がなければチームメイトはタスクを申告できず(タスクは名前で所有)、インボックスを確認できず(インボックスファイルは agent_id をキーとする)、メッセージで自分を識別できません。そのため自動コンパクトの後、システムは構造化されたアイデンティティブロックを会話に再注入します。これはメモリ喪失後もチームメイトが機能し続けるために必要な最小限のコンテキストです。"
}
}
]
}
================================================
FILE: web/src/data/annotations/s12.json
================================================
{
"version": "s12",
"decisions": [
{
"id": "shared-board-isolated-lanes",
"title": "Shared Task Board + Isolated Execution Lanes",
"description": "The task board remains shared and centralized in `.tasks/`, while file edits happen in per-task worktree directories. This separation preserves global visibility (who owns what, what is done) without forcing everyone to edit inside one mutable directory. Coordination stays simple because there is one board, and execution stays safe because each lane is isolated.",
"alternatives": "A single shared workspace is simpler but causes edit collisions and mixed git state. Fully independent task stores per lane avoid collisions but lose team-level visibility and make planning harder.",
"zh": {
"title": "共享任务板 + 隔离执行通道",
"description": "任务板继续集中在 `.tasks/`,而文件改动发生在按任务划分的 worktree 目录中。这样既保留了全局可见性(谁在做什么、完成到哪),又避免所有人同时写同一目录导致冲突。协调层简单(一个任务板),执行层安全(多条隔离通道)。"
},
"ja": {
"title": "共有タスクボード + 分離実行レーン",
"description": "タスクボードは `.tasks/` に集約しつつ、実際の編集はタスクごとの worktree ディレクトリで行う。これにより全体の可視性(担当と進捗)を維持しながら、単一ディレクトリでの衝突を回避できる。調整は1つのボードで単純化され、実行はレーン分離で安全になる。"
}
},
{
"id": "index-file-lifecycle",
"title": "Explicit Worktree Lifecycle Index",
"description": "`.worktrees/index.json` records each worktree's name, path, branch, task_id, and status. This makes lifecycle state inspectable and recoverable even after context compression or process restarts. The index also provides a deterministic source for list/status/remove operations.",
"alternatives": "Relying only on `git worktree list` removes local bookkeeping but loses task binding metadata and custom lifecycle states. Keeping all state only in memory is simpler in code but breaks recoverability.",
"zh": {
"title": "显式 worktree 生命周期索引",
"description": "`.worktrees/index.json` 记录每个 worktree 的名称、路径、分支、task_id 与状态。即使上下文压缩或进程重启,这些生命周期状态仍可检查和恢复。它也为 list/status/remove 提供了确定性的本地数据源。"
},
"ja": {
"title": "明示的な worktree ライフサイクル索引",
"description": "`.worktrees/index.json` に name/path/branch/task_id/status を記録することで、コンテキスト圧縮やプロセス再起動後も状態を追跡できる。list/status/remove の挙動もこの索引を基準に決定できる。"
}
},
{
"id": "lane-cwd-routing-and-reentry-guard",
"title": "Lane-Scoped CWD Routing + Re-entry Guard",
"description": "Commands are routed to a worktree's directory via `worktree_run(name, command)` using the `cwd` parameter. A re-entry guard prevents accidentally running inside an already-active worktree context, keeping lifecycle ownership unambiguous.",
"alternatives": "Global cwd mutation is easy to implement but can leak context across parallel work. Allowing silent re-entry makes lifecycle ownership ambiguous and complicates teardown behavior.",
"zh": {
"title": "按通道 cwd 路由 + 禁止重入",
"description": "命令通过 `worktree_run(name, command)` 使用 `cwd` 参数路由到 worktree 目录。重入保护避免了在已激活的 worktree 上下文中意外二次进入,保持生命周期归属清晰。"
},
"ja": {
"title": "レーン単位 cwd ルーティング + 再入防止",
"description": "`worktree_run(name, command)` で `cwd` パラメータを使いコマンドを worktree ディレクトリへ転送する。再入ガードにより active な worktree への二重入場を防ぎ、ライフサイクルの帰属を明確に保つ。"
}
},
{
"id": "event-stream-observability",
"title": "Append-Only Lifecycle Event Stream",
"description": "Lifecycle events are appended to `.worktrees/events.jsonl` (`worktree.create.*`, `worktree.remove.*`, `task.completed`). This turns hidden transitions into queryable records and makes failures explicit (`*.failed`) instead of silent.",
"alternatives": "Relying only on console logs is lighter but fragile during long sessions and hard to audit. A full event bus infrastructure is powerful but heavier than needed for this teaching baseline.",
"zh": {
"title": "追加式生命周期事件流",
"description": "生命周期事件写入 `.worktrees/events.jsonl`(如 `worktree.create.*`、`worktree.remove.*`、`task.completed`)。这样状态迁移可查询、可追踪,失败也会以 `*.failed` 显式暴露,而不是静默丢失。"
},
"ja": {
"title": "追記型ライフサイクルイベント",
"description": "ライフサイクルイベントを `.worktrees/events.jsonl` に追記する(`worktree.create.*`、`worktree.remove.*`、`task.completed` など)。遷移が可観測になり、失敗も `*.failed` として明示できる。"
}
},
{
"id": "task-worktree-closeout",
"title": "Close Task and Workspace Together",
"description": "`worktree_remove(..., complete_task=true)` allows a single closeout step: remove the isolated directory and mark the bound task completed. Closeout remains an explicit tool-driven transition (`worktree_keep` / `worktree_remove`) rather than hidden automatic cleanup. This reduces dangling state where a task says done but its temporary lane remains active (or the reverse).",
"alternatives": "Keeping closeout fully manual gives flexibility but increases operational drift. Fully automatic removal on every completion risks deleting a workspace before final review.",
"zh": {
"title": "任务与工作区一起收尾",
"description": "`worktree_remove(..., complete_task=true)` 允许在一个动作里完成收尾:删除隔离目录并把绑定任务标记为 completed。收尾保持为显式工具驱动迁移(`worktree_keep` / `worktree_remove`),而不是隐藏的自动清理。这样可减少状态悬挂(任务已完成但临时工作区仍活跃,或反过来)。"
},
"ja": {
"title": "タスクとワークスペースを同時にクローズ",
"description": "`worktree_remove(..., complete_task=true)` により、分離ディレクトリ削除とタスク完了更新を1ステップで実行できる。クローズ処理は `worktree_keep` / `worktree_remove` の明示ツール遷移として扱い、暗黙の自動清掃にはしない。"
}
},
{
"id": "event-stream-side-channel",
"title": "Event Stream Is Observability Side-Channel",
"description": "Lifecycle events improve auditability, but the source of truth remains task/worktree state files. Events should be read as transition traces, not as a replacement state machine.",
"alternatives": "Using logs alone hides structured transitions; using events as the only state source risks drift when replay/repair semantics are undefined.",
"zh": {
"title": "事件流是观测旁路,不是状态机替身",
"description": "生命周期事件提升可审计性,但真实状态源仍是任务/工作区状态文件。事件更适合做迁移轨迹,而不是替代主状态机。"
},
"ja": {
"title": "イベントは観測サイドチャネルであり状態機械の代替ではない",
"description": "ライフサイクルイベントは監査性を高めるが、真の状態源は task/worktree 状態ファイルのまま。イベントは遷移トレースとして扱い、主状態機械の代替にしない。"
}
}
]
}
================================================
FILE: web/src/data/execution-flows.ts
================================================
import type { FlowNode, FlowEdge } from "@/types/agent-data";
export interface FlowDefinition {
nodes: FlowNode[];
edges: FlowEdge[];
}
const FLOW_WIDTH = 600;
const COL_CENTER = FLOW_WIDTH / 2;
const COL_LEFT = 140;
const COL_RIGHT = FLOW_WIDTH - 140;
export const EXECUTION_FLOWS: Record = {
s01: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
{ id: "bash", label: "Execute Bash", type: "subprocess", x: COL_LEFT, y: 280 },
{ id: "append", label: "Append Result", type: "process", x: COL_LEFT, y: 360 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
],
edges: [
{ from: "start", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "bash", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "bash", to: "append" },
{ from: "append", to: "llm" },
],
},
s02: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
{ id: "dispatch", label: "Tool Dispatch", type: "process", x: COL_LEFT, y: 280 },
{ id: "exec", label: "bash / read / write / edit", type: "subprocess", x: COL_LEFT, y: 360 },
{ id: "append", label: "Append Result", type: "process", x: COL_LEFT, y: 440 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
],
edges: [
{ from: "start", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "dispatch", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "dispatch", to: "exec" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
],
},
s03: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "todo", label: "Create Todos", type: "process", x: COL_CENTER, y: 100 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 180 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 260 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT, y: 340 },
{ id: "append", label: "Append Result", type: "process", x: COL_LEFT, y: 410 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 340 },
],
edges: [
{ from: "start", to: "todo" },
{ from: "todo", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "exec", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
],
},
s04: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
{ id: "is_task", label: "task tool?", type: "decision", x: COL_LEFT, y: 280 },
{ id: "spawn", label: "Spawn Subagent\n(fresh messages[])", type: "subprocess", x: 60, y: 380 },
{ id: "sub_loop", label: "Subagent Loop", type: "process", x: 60, y: 460 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 380 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 540 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
],
edges: [
{ from: "start", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "is_task", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "is_task", to: "spawn", label: "task" },
{ from: "is_task", to: "exec", label: "other" },
{ from: "spawn", to: "sub_loop" },
{ from: "sub_loop", to: "append" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
],
},
s05: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
{ id: "is_skill", label: "load_skill?", type: "decision", x: COL_LEFT, y: 280 },
{ id: "load", label: "Read SKILL.md", type: "subprocess", x: 60, y: 370 },
{ id: "inject", label: "Inject via\ntool_result", type: "process", x: 60, y: 450 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 370 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 530 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
],
edges: [
{ from: "start", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "is_skill", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "is_skill", to: "load", label: "skill" },
{ from: "is_skill", to: "exec", label: "other" },
{ from: "load", to: "inject" },
{ from: "inject", to: "append" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
],
},
s06: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "compress_check", label: "Over token\nlimit?", type: "decision", x: COL_CENTER, y: 110 },
{ id: "compress", label: "Compress Context", type: "subprocess", x: COL_RIGHT, y: 110 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 200 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 280 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT, y: 360 },
{ id: "append", label: "Append Result", type: "process", x: COL_LEFT, y: 430 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 360 },
],
edges: [
{ from: "start", to: "compress_check" },
{ from: "compress_check", to: "compress", label: "yes" },
{ from: "compress_check", to: "llm", label: "no" },
{ from: "compress", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "exec", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "exec", to: "append" },
{ from: "append", to: "compress_check" },
],
},
s07: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
{ id: "is_task", label: "task_manager?", type: "decision", x: COL_LEFT, y: 280 },
{ id: "crud", label: "CRUD Task\n(file-based)", type: "subprocess", x: 60, y: 370 },
{ id: "dep_check", label: "Check\nDependencies", type: "process", x: 60, y: 450 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 370 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 530 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
],
edges: [
{ from: "start", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "is_task", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "is_task", to: "crud", label: "task" },
{ from: "is_task", to: "exec", label: "other" },
{ from: "crud", to: "dep_check" },
{ from: "dep_check", to: "append" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
],
},
s08: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
{ id: "bg_check", label: "Background?", type: "decision", x: COL_LEFT, y: 280 },
{ id: "bg_spawn", label: "Spawn Thread", type: "subprocess", x: 60, y: 370 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 370 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 450 },
{ id: "notify", label: "Notification\nQueue", type: "process", x: 60, y: 450 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
],
edges: [
{ from: "start", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "bg_check", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "bg_check", to: "bg_spawn", label: "bg" },
{ from: "bg_check", to: "exec", label: "fg" },
{ from: "bg_spawn", to: "notify" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
{ from: "notify", to: "llm" },
],
},
s09: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call\n(team lead)", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 200 },
{ id: "is_team", label: "Team tool?", type: "decision", x: COL_LEFT, y: 290 },
{ id: "spawn", label: "Spawn\nTeammate", type: "subprocess", x: 60, y: 390 },
{ id: "msg", label: "Send Message\n(JSONL inbox)", type: "subprocess", x: 60, y: 470 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 390 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 550 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 290 },
{ id: "teammate", label: "Teammate Agent\n(own loop)", type: "process", x: COL_RIGHT, y: 470 },
],
edges: [
{ from: "start", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "is_team", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "is_team", to: "spawn", label: "spawn" },
{ from: "is_team", to: "exec", label: "other" },
{ from: "spawn", to: "teammate" },
{ from: "spawn", to: "msg" },
{ from: "msg", to: "append" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
],
},
s10: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call\n(team lead)", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 200 },
{ id: "is_proto", label: "Protocol?", type: "decision", x: COL_LEFT, y: 290 },
{ id: "shutdown", label: "Shutdown\nRequest", type: "subprocess", x: 60, y: 390 },
{ id: "fsm", label: "FSM:\npending->approved", type: "process", x: 60, y: 470 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT + 80, y: 390 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 550 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 290 },
{ id: "teammate", label: "Teammate\nreceives request_id", type: "process", x: COL_RIGHT, y: 470 },
],
edges: [
{ from: "start", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "is_proto", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "is_proto", to: "shutdown", label: "shutdown" },
{ from: "is_proto", to: "exec", label: "other" },
{ from: "shutdown", to: "fsm" },
{ from: "fsm", to: "teammate" },
{ from: "teammate", to: "append" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
],
},
s11: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "inbox", label: "Check Inbox", type: "process", x: COL_CENTER, y: 100 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 180 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 260 },
{ id: "exec", label: "Execute Tool", type: "subprocess", x: COL_LEFT, y: 340 },
{ id: "append", label: "Append Result", type: "process", x: COL_LEFT, y: 410 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 340 },
{ id: "idle", label: "Idle Cycle", type: "process", x: COL_RIGHT, y: 420 },
{ id: "poll", label: "Poll Tasks\n+ Auto-Claim", type: "subprocess", x: COL_RIGHT, y: 500 },
],
edges: [
{ from: "start", to: "inbox" },
{ from: "inbox", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "exec", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "exec", to: "append" },
{ from: "append", to: "llm" },
{ from: "end", to: "idle" },
{ from: "idle", to: "poll" },
{ from: "poll", to: "inbox" },
],
},
s12: {
nodes: [
{ id: "start", label: "User Input", type: "start", x: COL_CENTER, y: 30 },
{ id: "llm", label: "LLM Call", type: "process", x: COL_CENTER, y: 110 },
{ id: "tool_check", label: "tool_use?", type: "decision", x: COL_CENTER, y: 190 },
{ id: "is_wt", label: "worktree tool?", type: "decision", x: COL_LEFT, y: 280 },
{ id: "task", label: "Task Board\\n(.tasks)", type: "process", x: 60, y: 360 },
{ id: "wt_create", label: "Allocate / Enter\\nWorktree", type: "subprocess", x: 60, y: 440 },
{ id: "wt_run", label: "Run in\\nIsolated Dir", type: "subprocess", x: COL_LEFT + 80, y: 360 },
{ id: "wt_close", label: "Closeout:\\nworktree_keep / remove", type: "process", x: COL_LEFT + 80, y: 440 },
{ id: "events", label: "Emit Lifecycle Events\\n(side-channel)", type: "process", x: COL_RIGHT, y: 420 },
{ id: "events_read", label: "Optional Read\\nworktree_events", type: "subprocess", x: COL_RIGHT, y: 520 },
{ id: "append", label: "Append Result", type: "process", x: COL_CENTER, y: 530 },
{ id: "end", label: "Output", type: "end", x: COL_RIGHT, y: 280 },
],
edges: [
{ from: "start", to: "llm" },
{ from: "llm", to: "tool_check" },
{ from: "tool_check", to: "is_wt", label: "yes" },
{ from: "tool_check", to: "end", label: "no" },
{ from: "is_wt", to: "task", label: "task ops" },
{ from: "is_wt", to: "wt_create", label: "create/bind" },
{ from: "is_wt", to: "wt_run", label: "run/status" },
{ from: "task", to: "wt_create", label: "allocate lane" },
{ from: "wt_create", to: "wt_run" },
{ from: "task", to: "append", label: "task result" },
{ from: "wt_create", to: "events", label: "emit create" },
{ from: "wt_create", to: "append", label: "create result" },
{ from: "wt_run", to: "wt_close" },
{ from: "wt_run", to: "append", label: "run/status result" },
{ from: "wt_close", to: "events", label: "emit closeout" },
{ from: "wt_close", to: "append", label: "closeout result" },
{ from: "events", to: "events_read", label: "optional query" },
{ from: "events_read", to: "append", label: "events result" },
{ from: "append", to: "llm" },
],
},
};
export function getFlowForVersion(version: string): FlowDefinition | null {
return EXECUTION_FLOWS[version] ?? null;
}
================================================
FILE: web/src/data/generated/docs.json
================================================
[
{
"version": "s01",
"locale": "en",
"title": "s01: The Agent Loop",
"content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- one tool + one loop = an agent.\n\n## Problem\n\nA language model can reason about code, but it can't *touch* the real world -- can't read files, run tests, or check errors. Without a loop, every tool call requires you to manually copy-paste results back. You become the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\nOne exit condition controls the entire flow. The loop runs until the model stops calling tools.\n\n## How It Works\n\n1. User prompt becomes the first message.\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. Send messages + tool definitions to the LLM.\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. Append the assistant response. Check `stop_reason` -- if the model didn't call a tool, we're done.\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. Execute each tool call, collect results, append as a user message. Loop back to step 2.\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\nAssembled into one function:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nThat's the entire agent in under 30 lines. Everything else in this course layers on top -- without changing the loop.\n\n## What Changed\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n"
},
{
"version": "s02",
"locale": "en",
"title": "s02: Tool Use",
"content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Adding a tool means adding one handler\"* -- the loop stays the same; new tools register into the dispatch map.\n\n## Problem\n\nWith only `bash`, the agent shells out for everything. `cat` truncates unpredictably, `sed` fails on special characters, and every bash call is an unconstrained security surface. Dedicated tools like `read_file` and `write_file` let you enforce path sandboxing at the tool level.\n\nThe key insight: adding tools does not require changing the loop.\n\n## Solution\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## How It Works\n\n1. Each tool gets a handler function. Path sandboxing prevents workspace escape.\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. The dispatch map links tool names to handlers.\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. In the loop, look up the handler by name. The loop body itself is unchanged from s01.\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nAdd a tool = add a handler + add a schema entry. The loop never changes.\n\n## What Changed From s01\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n"
},
{
"version": "s03",
"locale": "en",
"title": "s03: TodoWrite",
"content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"An agent without a plan drifts\"* -- list the steps first, then execute.\n\n## Problem\n\nOn multi-step tasks, the model loses track. It repeats work, skips steps, or wanders off. Long conversations make this worse -- the system prompt fades as tool results fill the context. A 10-step refactoring might complete steps 1-3, then the model starts improvising because it forgot steps 4-10.\n\n## Solution\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## How It Works\n\n1. TodoManager stores items with statuses. Only one item can be `in_progress` at a time.\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. The `todo` tool goes into the dispatch map like any other tool.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. A nag reminder injects a nudge if the model goes 3+ rounds without calling `todo`.\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos. \",\n })\n```\n\nThe \"one in_progress at a time\" constraint forces sequential focus. The nag reminder creates accountability.\n\n## What Changed From s02\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n"
},
{
"version": "s04",
"locale": "en",
"title": "s04: Subagents",
"content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big tasks down; each subtask gets a clean context\"* -- subagents use independent messages[], keeping the main conversation clean.\n\n## Problem\n\nAs the agent works, its messages array grows. Every file read, every bash output stays in context permanently. \"What testing framework does this project use?\" might require reading 5 files, but the parent only needs the answer: \"pytest.\"\n\n## Solution\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## How It Works\n\n1. The parent gets a `task` tool. The child gets all base tools except `task` (no recursive spawning).\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. The subagent starts with `messages=[]` and runs its own loop. Only the final text returns to the parent.\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\nThe child's entire message history (possibly 30+ tool calls) is discarded. The parent receives a one-paragraph summary as a normal `tool_result`.\n\n## What Changed From s03\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n"
},
{
"version": "s05",
"locale": "en",
"title": "s05: Skills",
"content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Load knowledge when you need it, not upfront\"* -- inject via tool_result, not the system prompt.\n\n## Problem\n\nYou want the agent to follow domain-specific workflows: git conventions, testing patterns, code review checklists. Putting everything in the system prompt wastes tokens on unused skills. 10 skills at 2000 tokens each = 20,000 tokens, most of which are irrelevant to any given task.\n\n## Solution\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\nLayer 1: skill *names* in system prompt (cheap). Layer 2: full *body* via tool_result (on demand).\n\n## How It Works\n\n1. Each skill is a directory containing a `SKILL.md` with YAML frontmatter.\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader scans for `SKILL.md` files, uses the directory name as the skill identifier.\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n \"\n```\n\n3. Layer 1 goes into the system prompt. Layer 2 is just another tool handler.\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nThe model learns what skills exist (cheap) and loads them when relevant (expensive).\n\n## What Changed From s04\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n"
},
{
"version": "s06",
"locale": "en",
"title": "s06: Context Compact",
"content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"Context will fill up; you need a way to make room\"* -- three-layer compression strategy for infinite sessions.\n\n## Problem\n\nThe context window is finite. A single `read_file` on a 1000-line file costs ~4000 tokens. After reading 30 files and running 20 bash commands, you hit 100,000+ tokens. The agent cannot work on large codebases without compression.\n\n## Solution\n\nThree layers, increasing in aggressiveness:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## How It Works\n\n1. **Layer 1 -- micro_compact**: Before each LLM call, replace old tool results with placeholders.\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **Layer 2 -- auto_compact**: When tokens exceed threshold, save full transcript to disk, then ask the LLM to summarize.\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **Layer 3 -- manual compact**: The `compact` tool triggers the same summarization on demand.\n\n4. The loop integrates all three:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nTranscripts preserve full history on disk. Nothing is truly lost -- just moved out of active context.\n\n## What Changed From s05\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (watch micro-compact replace old results)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n"
},
{
"version": "s07",
"locale": "en",
"title": "s07: Task System",
"content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"Break big goals into small tasks, order them, persist to disk\"* -- a file-based task graph with dependencies, laying the foundation for multi-agent collaboration.\n\n## Problem\n\ns03's TodoManager is a flat checklist in memory: no ordering, no dependencies, no status beyond done-or-not. Real goals have structure -- task B depends on task A, tasks C and D can run in parallel, task E waits for both C and D.\n\nWithout explicit relationships, the agent can't tell what's ready, what's blocked, or what can run concurrently. And because the list lives only in memory, context compression (s06) wipes it clean.\n\n## Solution\n\nPromote the checklist into a **task graph** persisted to disk. Each task is a JSON file with status, dependencies (`blockedBy`), and dependents (`blocks`). The graph answers three questions at any moment:\n\n- **What's ready?** -- tasks with `pending` status and empty `blockedBy`.\n- **What's blocked?** -- tasks waiting on unfinished dependencies.\n- **What's done?** -- `completed` tasks, whose completion automatically unblocks dependents.\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nTask graph (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\nOrdering: task 1 must finish before 2 and 3\nParallelism: tasks 2 and 3 can run at the same time\nDependencies: task 4 waits for both 2 and 3\nStatus: pending -> in_progress -> completed\n```\n\nThis task graph becomes the coordination backbone for everything after s07: background execution (s08), multi-agent teams (s09+), and worktree isolation (s12) all read from and write to this same structure.\n\n## How It Works\n\n1. **TaskManager**: one JSON file per task, CRUD with dependency graph.\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **Dependency resolution**: completing a task clears its ID from every other task's `blockedBy` list, automatically unblocking dependents.\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **Status + dependency wiring**: `update` handles transitions and dependency edges.\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. Four task tools go into the dispatch map.\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\nFrom s07 onward, the task graph is the default for multi-step work. s03's Todo remains for quick single-session checklists.\n\n## What Changed From s06\n\n| Component | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| Planning model | Flat checklist (in-memory) | Task graph with dependencies (on disk) |\n| Relationships | None | `blockedBy` + `blocks` edges |\n| Status tracking | Done or not | `pending` -> `in_progress` -> `completed` |\n| Persistence | Lost on compression | Survives compression and restarts |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n"
},
{
"version": "s08",
"locale": "en",
"title": "s08: Background Tasks",
"content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"Run slow operations in the background; the agent keeps thinking\"* -- daemon threads run commands, inject notifications on completion.\n\n## Problem\n\nSome commands take minutes: `npm install`, `pytest`, `docker build`. With a blocking loop, the model sits idle waiting. If the user asks \"install dependencies and while that runs, create the config file,\" the agent does them sequentially, not in parallel.\n\n## Solution\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## How It Works\n\n1. BackgroundManager tracks tasks with a thread-safe notification queue.\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` starts a daemon thread and returns immediately.\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. When the subprocess finishes, its result goes into the notification queue.\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. The agent loop drains notifications before each LLM call.\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\" \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nThe loop stays single-threaded. Only subprocess I/O is parallelized.\n\n## What Changed From s07\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n"
},
{
"version": "s09",
"locale": "en",
"title": "s09: Agent Teams",
"content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"When the task is too big for one, delegate to teammates\"* -- persistent teammates + async mailboxes.\n\n## Problem\n\nSubagents (s04) are disposable: spawn, work, return summary, die. No identity, no memory between invocations. Background tasks (s08) run shell commands but can't make LLM-guided decisions.\n\nReal teamwork needs: (1) persistent agents that outlive a single prompt, (2) identity and lifecycle management, (3) a communication channel between agents.\n\n## Solution\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## How It Works\n\n1. TeammateManager maintains config.json with the team roster.\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` creates a teammate and starts its agent loop in a thread.\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only JSONL inboxes. `send()` appends a JSON line; `read_inbox()` reads all and drains.\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. Each teammate checks its inbox before every LLM call, injecting received messages into context.\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## What Changed From s08\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. Type `/team` to see the team roster with statuses\n5. Type `/inbox` to manually check the lead's inbox\n"
},
{
"version": "s10",
"locale": "en",
"title": "s10: Team Protocols",
"content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"Teammates need shared communication rules\"* -- one request-response pattern drives all negotiation.\n\n## Problem\n\nIn s09, teammates work and communicate but lack structured coordination:\n\n**Shutdown**: Killing a thread leaves files half-written and config.json stale. You need a handshake: the lead requests, the teammate approves (finish and exit) or rejects (keep working).\n\n**Plan approval**: When the lead says \"refactor the auth module,\" the teammate starts immediately. For high-risk changes, the lead should review the plan first.\n\nBoth share the same structure: one side sends a request with a unique ID, the other responds referencing that ID.\n\n## Solution\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## How It Works\n\n1. The lead initiates shutdown by generating a request_id and sending through the inbox.\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. The teammate receives the request and responds with approve/reject.\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. Plan approval follows the identical pattern. The teammate submits a plan (generating a request_id), the lead reviews (referencing the same request_id).\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\nOne FSM, two applications. The same `pending -> approved | rejected` state machine handles any request-response protocol.\n\n## What Changed From s09\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. Type `/team` to monitor statuses\n"
},
{
"version": "s11",
"locale": "en",
"title": "s11: Autonomous Agents",
"content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"Teammates scan the board and claim tasks themselves\"* -- no need for the lead to assign each one.\n\n## Problem\n\nIn s09-s10, teammates only work when explicitly told to. The lead must spawn each one with a specific prompt. 10 unclaimed tasks on the board? The lead assigns each one manually. Doesn't scale.\n\nTrue autonomy: teammates scan the task board themselves, claim unclaimed tasks, work on them, then look for more.\n\nOne subtlety: after context compression (s06), the agent might forget who it is. Identity re-injection fixes this.\n\n## Solution\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## How It Works\n\n1. The teammate loop has two phases: WORK and IDLE. When the LLM stops calling tools (or calls `idle`), the teammate enters IDLE.\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. The idle phase polls inbox and task board in a loop.\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']} \"})\n return True\n return False # timeout -> shutdown\n```\n\n3. Task board scanning: find pending, unowned, unblocked tasks.\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. Identity re-injection: when context is too short (compression happened), insert an identity block.\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work. \"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## What Changed From s10\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. Type `/tasks` to see the task board with owners\n5. Type `/team` to monitor who is working vs idle\n"
},
{
"version": "s12",
"locale": "en",
"title": "s12: Worktree + Task Isolation",
"content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"Each works in its own directory, no interference\"* -- tasks manage goals, worktrees manage directories, bound by ID.\n\n## Problem\n\nBy s11, agents can claim and complete tasks autonomously. But every task runs in one shared directory. Two agents refactoring different modules at the same time will collide: agent A edits `config.py`, agent B edits `config.py`, unstaged changes mix, and neither can roll back cleanly.\n\nThe task board tracks *what to do* but has no opinion about *where to do it*. The fix: give each task its own git worktree directory. Tasks manage goals, worktrees manage execution context. Bind them by task ID.\n\n## Solution\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## How It Works\n\n1. **Create a task.** Persist the goal first.\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **Create a worktree and bind to the task.** Passing `task_id` auto-advances the task to `in_progress`.\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\nThe binding writes state to both sides:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **Run commands in the worktree.** `cwd` points to the isolated directory.\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **Close out.** Two choices:\n - `worktree_keep(name)` -- preserve the directory for later.\n - `worktree_remove(name, complete_task=True)` -- remove directory, complete the bound task, emit event. One call handles teardown + completion.\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **Event stream.** Every lifecycle step emits to `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\nEvents emitted: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`.\n\nAfter a crash, state reconstructs from `.tasks/` + `.worktrees/index.json` on disk. Conversation memory is volatile; file state is durable.\n\n## What Changed From s11\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## Try It\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n"
},
{
"version": "s01",
"locale": "zh",
"title": "s01: The Agent Loop (智能体循环)",
"content": "# s01: The Agent Loop (智能体循环)\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 一个工具 + 一个循环 = 一个智能体。\n\n## 问题\n\n语言模型能推理代码, 但碰不到真实世界 -- 不能读文件、跑测试、看报错。没有循环, 每次工具调用你都得手动把结果粘回去。你自己就是那个循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n一个退出条件控制整个流程。循环持续运行, 直到模型不再调用工具。\n\n## 工作原理\n\n1. 用户 prompt 作为第一条消息。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. 将消息和工具定义一起发给 LLM。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. 追加助手响应。检查 `stop_reason` -- 如果模型没有调用工具, 结束。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 执行每个工具调用, 收集结果, 作为 user 消息追加。回到第 2 步。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n组装为一个完整函数:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\n不到 30 行, 这就是整个智能体。后面 11 个章节都在这个循环上叠加机制 -- 循环本身始终不变。\n\n## 变更内容\n\n| 组件 | 之前 | 之后 |\n|---------------|------------|--------------------------------|\n| Agent loop | (无) | `while True` + stop_reason |\n| Tools | (无) | `bash` (单一工具) |\n| Messages | (无) | 累积式消息列表 |\n| Control flow | (无) | `stop_reason != \"tool_use\"` |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n"
},
{
"version": "s02",
"locale": "zh",
"title": "s02: Tool Use (工具使用)",
"content": "# s02: Tool Use (工具使用)\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"加一个工具, 只加一个 handler\"* -- 循环不用动, 新工具注册进 dispatch map 就行。\n\n## 问题\n\n只有 `bash` 时, 所有操作都走 shell。`cat` 截断不可预测, `sed` 遇到特殊字符就崩, 每次 bash 调用都是不受约束的安全面。专用工具 (`read_file`, `write_file`) 可以在工具层面做路径沙箱。\n\n关键洞察: 加工具不需要改循环。\n\n## 解决方案\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 工作原理\n\n1. 每个工具有一个处理函数。路径沙箱防止逃逸工作区。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. dispatch map 将工具名映射到处理函数。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. 循环中按名称查找处理函数。循环体本身与 s01 完全一致。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\n加工具 = 加 handler + 加 schema。循环永远不变。\n\n## 相对 s01 的变更\n\n| 组件 | 之前 (s01) | 之后 (s02) |\n|----------------|--------------------|--------------------------------|\n| Tools | 1 (仅 bash) | 4 (bash, read, write, edit) |\n| Dispatch | 硬编码 bash 调用 | `TOOL_HANDLERS` 字典 |\n| 路径安全 | 无 | `safe_path()` 沙箱 |\n| Agent loop | 不变 | 不变 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n"
},
{
"version": "s03",
"locale": "zh",
"title": "s03: TodoWrite (待办写入)",
"content": "# s03: TodoWrite (待办写入)\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"没有计划的 agent 走哪算哪\"* -- 先列步骤再动手, 完成率翻倍。\n\n## 问题\n\n多步任务中, 模型会丢失进度 -- 重复做过的事、跳步、跑偏。对话越长越严重: 工具结果不断填满上下文, 系统提示的影响力逐渐被稀释。一个 10 步重构可能做完 1-3 步就开始即兴发挥, 因为 4-10 步已经被挤出注意力了。\n\n## 解决方案\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 工作原理\n\n1. TodoManager 存储带状态的项目。同一时间只允许一个 `in_progress`。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo` 工具和其他工具一样加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nag reminder: 模型连续 3 轮以上不调用 `todo` 时注入提醒。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos. \",\n })\n```\n\n\"同时只能有一个 in_progress\" 强制顺序聚焦。nag reminder 制造问责压力 -- 你不更新计划, 系统就追着你问。\n\n## 相对 s02 的变更\n\n| 组件 | 之前 (s02) | 之后 (s03) |\n|----------------|------------------|--------------------------------|\n| Tools | 4 | 5 (+todo) |\n| 规划 | 无 | 带状态的 TodoManager |\n| Nag 注入 | 无 | 3 轮后注入 `` |\n| Agent loop | 简单分发 | + rounds_since_todo 计数器 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n"
},
{
"version": "s04",
"locale": "zh",
"title": "s04: Subagents (子智能体)",
"content": "# s04: Subagents (子智能体)\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大任务拆小, 每个小任务干净的上下文\"* -- 子智能体用独立 messages[], 不污染主对话。\n\n## 问题\n\n智能体工作越久, messages 数组越胖。每次读文件、跑命令的输出都永久留在上下文里。\"这个项目用什么测试框架?\" 可能要读 5 个文件, 但父智能体只需要一个词: \"pytest。\"\n\n## 解决方案\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 工作原理\n\n1. 父智能体有一个 `task` 工具。子智能体拥有除 `task` 外的所有基础工具 (禁止递归生成)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. 子智能体以 `messages=[]` 启动, 运行自己的循环。只有最终文本返回给父智能体。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n子智能体可能跑了 30+ 次工具调用, 但整个消息历史直接丢弃。父智能体收到的只是一段摘要文本, 作为普通 `tool_result` 返回。\n\n## 相对 s03 的变更\n\n| 组件 | 之前 (s03) | 之后 (s04) |\n|----------------|------------------|-------------------------------|\n| Tools | 5 | 5 (基础) + task (仅父端) |\n| 上下文 | 单一共享 | 父 + 子隔离 |\n| Subagent | 无 | `run_subagent()` 函数 |\n| 返回值 | 不适用 | 仅摘要文本 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n"
},
{
"version": "s05",
"locale": "zh",
"title": "s05: Skills (技能加载)",
"content": "# s05: Skills (技能加载)\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"用到什么知识, 临时加载什么知识\"* -- 通过 tool_result 注入, 不塞 system prompt。\n\n## 问题\n\n你希望智能体遵循特定领域的工作流: git 约定、测试模式、代码审查清单。全塞进系统提示太浪费 -- 10 个技能, 每个 2000 token, 就是 20,000 token, 大部分跟当前任务毫无关系。\n\n## 解决方案\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第一层: 系统提示中放技能名称 (低成本)。第二层: tool_result 中按需放完整内容。\n\n## 工作原理\n\n1. 每个技能是一个目录, 包含 `SKILL.md` 文件和 YAML frontmatter。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoader 递归扫描 `SKILL.md` 文件, 用目录名作为技能标识。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n \"\n```\n\n3. 第一层写入系统提示。第二层不过是 dispatch map 中的又一个工具。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\n模型知道有哪些技能 (便宜), 需要时再加载完整内容 (贵)。\n\n## 相对 s04 的变更\n\n| 组件 | 之前 (s04) | 之后 (s05) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 (基础 + task) | 5 (基础 + load_skill) |\n| 系统提示 | 静态字符串 | + 技能描述列表 |\n| 知识库 | 无 | skills/\\*/SKILL.md 文件 |\n| 注入方式 | 无 | 两层 (系统提示 + result) |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n"
},
{
"version": "s06",
"locale": "zh",
"title": "s06: Context Compact (上下文压缩)",
"content": "# s06: Context Compact (上下文压缩)\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"上下文总会满, 要有办法腾地方\"* -- 三层压缩策略, 换来无限会话。\n\n## 问题\n\n上下文窗口是有限的。读一个 1000 行的文件就吃掉 ~4000 token; 读 30 个文件、跑 20 条命令, 轻松突破 100k token。不压缩, 智能体根本没法在大项目里干活。\n\n## 解决方案\n\n三层压缩, 激进程度递增:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 工作原理\n\n1. **第一层 -- micro_compact**: 每次 LLM 调用前, 将旧的 tool result 替换为占位符。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第二层 -- auto_compact**: token 超过阈值时, 保存完整对话到磁盘, 让 LLM 做摘要。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第三层 -- manual compact**: `compact` 工具按需触发同样的摘要机制。\n\n4. 循环整合三层:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\n完整历史通过 transcript 保存在磁盘上。信息没有真正丢失, 只是移出了活跃上下文。\n\n## 相对 s05 的变更\n\n| 组件 | 之前 (s05) | 之后 (s06) |\n|----------------|------------------|--------------------------------|\n| Tools | 5 | 5 (基础 + compact) |\n| 上下文管理 | 无 | 三层压缩 |\n| Micro-compact | 无 | 旧结果 -> 占位符 |\n| Auto-compact | 无 | token 阈值触发 |\n| Transcripts | 无 | 保存到 .transcripts/ |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Read every Python file in the agents/ directory one by one` (观察 micro-compact 替换旧结果)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n"
},
{
"version": "s07",
"locale": "zh",
"title": "s07: Task System (任务系统)",
"content": "# s07: Task System (任务系统)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大目标要拆成小任务, 排好序, 记在磁盘上\"* -- 文件持久化的任务图, 为多 agent 协作打基础。\n\n## 问题\n\ns03 的 TodoManager 只是内存中的扁平清单: 没有顺序、没有依赖、状态只有做完没做完。真实目标是有结构的 -- 任务 B 依赖任务 A, 任务 C 和 D 可以并行, 任务 E 要等 C 和 D 都完成。\n\n没有显式的关系, 智能体分不清什么能做、什么被卡住、什么能同时跑。而且清单只活在内存里, 上下文压缩 (s06) 一跑就没了。\n\n## 解决方案\n\n把扁平清单升级为持久化到磁盘的**任务图**。每个任务是一个 JSON 文件, 有状态、前置依赖 (`blockedBy`) 和后置依赖 (`blocks`)。任务图随时回答三个问题:\n\n- **什么可以做?** -- 状态为 `pending` 且 `blockedBy` 为空的任务。\n- **什么被卡住?** -- 等待前置任务完成的任务。\n- **什么做完了?** -- 状态为 `completed` 的任务, 完成时自动解锁后续任务。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\n任务图 (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n顺序: task 1 必须先完成, 才能开始 2 和 3\n并行: task 2 和 3 可以同时执行\n依赖: task 4 要等 2 和 3 都完成\n状态: pending -> in_progress -> completed\n```\n\n这个任务图是 s07 之后所有机制的协调骨架: 后台执行 (s08)、多 agent 团队 (s09+)、worktree 隔离 (s12) 都读写这同一个结构。\n\n## 工作原理\n\n1. **TaskManager**: 每个任务一个 JSON 文件, CRUD + 依赖图。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依赖解除**: 完成任务时, 自动将其 ID 从其他任务的 `blockedBy` 中移除, 解锁后续任务。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **状态变更 + 依赖关联**: `update` 处理状态转换和依赖边。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 四个任务工具加入 dispatch map。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\n从 s07 起, 任务图是多步工作的默认选择。s03 的 Todo 仍可用于单次会话内的快速清单。\n\n## 相对 s06 的变更\n\n| 组件 | 之前 (s06) | 之后 (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 规划模型 | 扁平清单 (仅内存) | 带依赖关系的任务图 (磁盘) |\n| 关系 | 无 | `blockedBy` + `blocks` 边 |\n| 状态追踪 | 做完没做完 | `pending` -> `in_progress` -> `completed` |\n| 持久化 | 压缩后丢失 | 压缩和重启后存活 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n"
},
{
"version": "s08",
"locale": "zh",
"title": "s08: Background Tasks (后台任务)",
"content": "# s08: Background Tasks (后台任务)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"慢操作丢后台, agent 继续想下一步\"* -- 后台线程跑命令, 完成后注入通知。\n\n## 问题\n\n有些命令要跑好几分钟: `npm install`、`pytest`、`docker build`。阻塞式循环下模型只能干等。用户说 \"装依赖, 顺便建个配置文件\", 智能体却只能一个一个来。\n\n## 解决方案\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 工作原理\n\n1. BackgroundManager 用线程安全的通知队列追踪任务。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()` 启动守护线程, 立即返回。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. 子进程完成后, 结果进入通知队列。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. 每次 LLM 调用前排空通知队列。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\" \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\n循环保持单线程。只有子进程 I/O 被并行化。\n\n## 相对 s07 的变更\n\n| 组件 | 之前 (s07) | 之后 (s08) |\n|----------------|------------------|------------------------------------|\n| Tools | 8 | 6 (基础 + background_run + check) |\n| 执行方式 | 仅阻塞 | 阻塞 + 后台线程 |\n| 通知机制 | 无 | 每轮排空的队列 |\n| 并发 | 无 | 守护线程 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n"
},
{
"version": "s09",
"locale": "zh",
"title": "s09: Agent Teams (智能体团队)",
"content": "# s09: Agent Teams (智能体团队)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"任务太大一个人干不完, 要能分给队友\"* -- 持久化队友 + JSONL 邮箱。\n\n## 问题\n\n子智能体 (s04) 是一次性的: 生成、干活、返回摘要、消亡。没有身份, 没有跨调用的记忆。后台任务 (s08) 能跑 shell 命令, 但做不了 LLM 引导的决策。\n\n真正的团队协作需要三样东西: (1) 能跨多轮对话存活的持久智能体, (2) 身份和生命周期管理, (3) 智能体之间的通信通道。\n\n## 解决方案\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 工作原理\n\n1. TeammateManager 通过 config.json 维护团队名册。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()` 创建队友并在线程中启动 agent loop。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: append-only 的 JSONL 收件箱。`send()` 追加一行; `read_inbox()` 读取全部并清空。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 每个队友在每次 LLM 调用前检查收件箱, 将消息注入上下文。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## 相对 s08 的变更\n\n| 组件 | 之前 (s08) | 之后 (s09) |\n|----------------|------------------|------------------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| 智能体数量 | 单一 | 领导 + N 个队友 |\n| 持久化 | 无 | config.json + JSONL 收件箱 |\n| 线程 | 后台命令 | 每线程完整 agent loop |\n| 生命周期 | 一次性 | idle -> working -> idle |\n| 通信 | 无 | message + broadcast |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. 输入 `/team` 查看团队名册和状态\n5. 输入 `/inbox` 手动检查领导的收件箱\n"
},
{
"version": "s10",
"locale": "zh",
"title": "s10: Team Protocols (团队协议)",
"content": "# s10: Team Protocols (团队协议)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"队友之间要有统一的沟通规矩\"* -- 一个 request-response 模式驱动所有协商。\n\n## 问题\n\ns09 中队友能干活能通信, 但缺少结构化协调:\n\n**关机**: 直接杀线程会留下写了一半的文件和过期的 config.json。需要握手 -- 领导请求, 队友批准 (收尾退出) 或拒绝 (继续干)。\n\n**计划审批**: 领导说 \"重构认证模块\", 队友立刻开干。高风险变更应该先过审。\n\n两者结构一样: 一方发带唯一 ID 的请求, 另一方引用同一 ID 响应。\n\n## 解决方案\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 工作原理\n\n1. 领导生成 request_id, 通过收件箱发起关机请求。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. 队友收到请求后, 用 approve/reject 响应。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. 计划审批遵循完全相同的模式。队友提交计划 (生成 request_id), 领导审查 (引用同一个 request_id)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n一个 FSM, 两种用途。同样的 `pending -> approved | rejected` 状态机可以套用到任何请求-响应协议上。\n\n## 相对 s09 的变更\n\n| 组件 | 之前 (s09) | 之后 (s10) |\n|----------------|------------------|--------------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan) |\n| 关机 | 仅自然退出 | 请求-响应握手 |\n| 计划门控 | 无 | 提交/审查与审批 |\n| 关联 | 无 | 每个请求一个 request_id |\n| FSM | 无 | pending -> approved/rejected |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. 输入 `/team` 监控状态\n"
},
{
"version": "s11",
"locale": "zh",
"title": "s11: Autonomous Agents (自治智能体)",
"content": "# s11: Autonomous Agents (自治智能体)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"队友自己看看板, 有活就认领\"* -- 不需要领导逐个分配, 自组织。\n\n## 问题\n\ns09-s10 中, 队友只在被明确指派时才动。领导得给每个队友写 prompt, 任务看板上 10 个未认领的任务得手动分配。这扩展不了。\n\n真正的自治: 队友自己扫描任务看板, 认领没人做的任务, 做完再找下一个。\n\n一个细节: 上下文压缩 (s06) 后智能体可能忘了自己是谁。身份重注入解决这个问题。\n\n## 解决方案\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 工作原理\n\n1. 队友循环分两个阶段: WORK 和 IDLE。LLM 停止调用工具 (或调用了 `idle`) 时, 进入 IDLE。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. 空闲阶段循环轮询收件箱和任务看板。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']} \"})\n return True\n return False # timeout -> shutdown\n```\n\n3. 任务看板扫描: 找 pending 状态、无 owner、未被阻塞的任务。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. 身份重注入: 上下文过短 (说明发生了压缩) 时, 在开头插入身份块。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work. \"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## 相对 s10 的变更\n\n| 组件 | 之前 (s10) | 之后 (s11) |\n|----------------|------------------|----------------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| 自治性 | 领导指派 | 自组织 |\n| 空闲阶段 | 无 | 轮询收件箱 + 任务看板 |\n| 任务认领 | 仅手动 | 自动认领未分配任务 |\n| 身份 | 系统提示 | + 压缩后重注入 |\n| 超时 | 无 | 60 秒空闲 -> 自动关机 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. 输入 `/tasks` 查看带 owner 的任务看板\n5. 输入 `/team` 监控谁在工作、谁在空闲\n"
},
{
"version": "s12",
"locale": "zh",
"title": "s12: Worktree + Task Isolation (Worktree 任务隔离)",
"content": "# s12: Worktree + Task Isolation (Worktree 任务隔离)\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各干各的目录, 互不干扰\"* -- 任务管目标, worktree 管目录, 按 ID 绑定。\n\n## 问题\n\n到 s11, 智能体已经能自主认领和完成任务。但所有任务共享一个目录。两个智能体同时重构不同模块 -- A 改 `config.py`, B 也改 `config.py`, 未提交的改动互相污染, 谁也没法干净回滚。\n\n任务板管 \"做什么\" 但不管 \"在哪做\"。解法: 给每个任务一个独立的 git worktree 目录, 用任务 ID 把两边关联起来。\n\n## 解决方案\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 工作原理\n\n1. **创建任务。** 先把目标持久化。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **创建 worktree 并绑定任务。** 传入 `task_id` 自动将任务推进到 `in_progress`。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n绑定同时写入两侧状态:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **在 worktree 中执行命令。** `cwd` 指向隔离目录。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **收尾。** 两种选择:\n - `worktree_keep(name)` -- 保留目录供后续使用。\n - `worktree_remove(name, complete_task=True)` -- 删除目录, 完成绑定任务, 发出事件。一个调用搞定拆除 + 完成。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **事件流。** 每个生命周期步骤写入 `.worktrees/events.jsonl`:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n事件类型: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\n崩溃后从 `.tasks/` + `.worktrees/index.json` 重建现场。会话记忆是易失的; 磁盘状态是持久的。\n\n## 相对 s11 的变更\n\n| 组件 | 之前 (s11) | 之后 (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| 协调 | 任务板 (owner/status) | 任务板 + worktree 显式绑定 |\n| 执行范围 | 共享目录 | 每个任务独立目录 |\n| 可恢复性 | 仅任务状态 | 任务状态 + worktree 索引 |\n| 收尾 | 任务完成 | 任务完成 + 显式 keep/remove |\n| 生命周期可见性 | 隐式日志 | `.worktrees/events.jsonl` 显式事件流 |\n\n## 试一试\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n试试这些 prompt (英文 prompt 对 LLM 效果更好, 也可以用中文):\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n"
},
{
"version": "s01",
"locale": "ja",
"title": "s01: The Agent Loop",
"content": "# s01: The Agent Loop\n\n`[ s01 ] s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"One loop & Bash is all you need\"* -- 1つのツール + 1つのループ = エージェント。\n\n## 問題\n\n言語モデルはコードについて推論できるが、現実世界に触れられない。ファイルを読めず、テストを実行できず、エラーを確認できない。ループがなければ、ツール呼び出しのたびにユーザーが手動で結果をコピーペーストする必要がある。つまりユーザー自身がループになる。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tool |\n| prompt | | | | execute |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n (loop until stop_reason != \"tool_use\")\n```\n\n1つの終了条件がフロー全体を制御する。モデルがツール呼び出しを止めるまでループが回り続ける。\n\n## 仕組み\n\n1. ユーザーのプロンプトが最初のメッセージになる。\n\n```python\nmessages.append({\"role\": \"user\", \"content\": query})\n```\n\n2. メッセージとツール定義をLLMに送信する。\n\n```python\nresponse = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n)\n```\n\n3. アシスタントのレスポンスを追加し、`stop_reason`を確認する。ツールが呼ばれなければ終了。\n\n```python\nmessages.append({\"role\": \"assistant\", \"content\": response.content})\nif response.stop_reason != \"tool_use\":\n return\n```\n\n4. 各ツール呼び出しを実行し、結果を収集してuserメッセージとして追加。ステップ2に戻る。\n\n```python\nresults = []\nfor block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\nmessages.append({\"role\": \"user\", \"content\": results})\n```\n\n1つの関数にまとめると:\n\n```python\ndef agent_loop(query):\n messages = [{\"role\": \"user\", \"content\": query}]\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = run_bash(block.input[\"command\"])\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n messages.append({\"role\": \"user\", \"content\": results})\n```\n\nこれでエージェント全体が30行未満に収まる。本コースの残りはすべてこのループの上に積み重なる -- ループ自体は変わらない。\n\n## 変更点\n\n| Component | Before | After |\n|---------------|------------|--------------------------------|\n| Agent loop | (none) | `while True` + stop_reason |\n| Tools | (none) | `bash` (one tool) |\n| Messages | (none) | Accumulating list |\n| Control flow | (none) | `stop_reason != \"tool_use\"` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s01_agent_loop.py\n```\n\n1. `Create a file called hello.py that prints \"Hello, World!\"`\n2. `List all Python files in this directory`\n3. `What is the current git branch?`\n4. `Create a directory called test_output and write 3 files in it`\n"
},
{
"version": "s02",
"locale": "ja",
"title": "s02: Tool Use",
"content": "# s02: Tool Use\n\n`s01 > [ s02 ] s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"ツールを足すなら、ハンドラーを1つ足すだけ\"* -- ループは変わらない。新ツールは dispatch map に登録するだけ。\n\n## 問題\n\n`bash`だけでは、エージェントは何でもシェル経由で行う。`cat`は予測不能に切り詰め、`sed`は特殊文字で壊れ、すべてのbash呼び出しが制約のないセキュリティ面になる。`read_file`や`write_file`のような専用ツールなら、ツールレベルでパスのサンドボックス化を強制できる。\n\n重要な点: ツールを追加してもループの変更は不要。\n\n## 解決策\n\n```\n+--------+ +-------+ +------------------+\n| User | ---> | LLM | ---> | Tool Dispatch |\n| prompt | | | | { |\n+--------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +-----------+ edit: run_edit |\n tool_result | } |\n +------------------+\n\nThe dispatch map is a dict: {tool_name: handler_function}.\nOne lookup replaces any if/elif chain.\n```\n\n## 仕組み\n\n1. 各ツールにハンドラ関数を定義する。パスのサンドボックス化でワークスペース外への脱出を防ぐ。\n\n```python\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_read(path: str, limit: int = None) -> str:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit]\n return \"\\n\".join(lines)[:50000]\n```\n\n2. ディスパッチマップがツール名とハンドラを結びつける。\n\n```python\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"],\n kw[\"new_text\"]),\n}\n```\n\n3. ループ内で名前によりハンドラをルックアップする。ループ本体はs01から不変。\n\n```python\nfor block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler \\\n else f\"Unknown tool: {block.name}\"\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": output,\n })\n```\n\nツール追加 = ハンドラ追加 + スキーマ追加。ループは決して変わらない。\n\n## s01からの変更点\n\n| Component | Before (s01) | After (s02) |\n|----------------|--------------------|----------------------------|\n| Tools | 1 (bash only) | 4 (bash, read, write, edit)|\n| Dispatch | Hardcoded bash call | `TOOL_HANDLERS` dict |\n| Path safety | None | `safe_path()` sandbox |\n| Agent loop | Unchanged | Unchanged |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s02_tool_use.py\n```\n\n1. `Read the file requirements.txt`\n2. `Create a file called greet.py with a greet(name) function`\n3. `Edit greet.py to add a docstring to the function`\n4. `Read greet.py to verify the edit worked`\n"
},
{
"version": "s03",
"locale": "ja",
"title": "s03: TodoWrite",
"content": "# s03: TodoWrite\n\n`s01 > s02 > [ s03 ] s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"計画のないエージェントは行き当たりばったり\"* -- まずステップを書き出し、それから実行。\n\n## 問題\n\nマルチステップのタスクで、モデルは途中で迷子になる。作業を繰り返したり、ステップを飛ばしたり、脱線したりする。長い会話になるほど悪化する -- ツール結果がコンテキストを埋めるにつれ、システムプロンプトの影響力が薄れる。10ステップのリファクタリングでステップ1-3を完了した後、残りを忘れて即興を始めてしまう。\n\n## 解決策\n\n```\n+--------+ +-------+ +---------+\n| User | ---> | LLM | ---> | Tools |\n| prompt | | | | + todo |\n+--------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +----------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject into tool_result\n```\n\n## 仕組み\n\n1. TodoManagerはアイテムのリストをステータス付きで保持する。`in_progress`にできるのは同時に1つだけ。\n\n```python\nclass TodoManager:\n def update(self, items: list) -> str:\n validated, in_progress_count = [], 0\n for item in items:\n status = item.get(\"status\", \"pending\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item[\"id\"], \"text\": item[\"text\"],\n \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress\")\n self.items = validated\n return self.render()\n```\n\n2. `todo`ツールは他のツールと同様にディスパッチマップに追加される。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n```\n\n3. nagリマインダーが、モデルが3ラウンド以上`todo`を呼ばなかった場合にナッジを注入する。\n\n```python\nif rounds_since_todo >= 3 and messages:\n last = messages[-1]\n if last[\"role\"] == \"user\" and isinstance(last.get(\"content\"), list):\n last[\"content\"].insert(0, {\n \"type\": \"text\",\n \"text\": \"Update your todos. \",\n })\n```\n\n「一度にin_progressは1つだけ」の制約が逐次的な集中を強制し、nagリマインダーが説明責任を生む。\n\n## s02からの変更点\n\n| Component | Before (s02) | After (s03) |\n|----------------|------------------|----------------------------|\n| Tools | 4 | 5 (+todo) |\n| Planning | None | TodoManager with statuses |\n| Nag injection | None | `` after 3 rounds|\n| Agent loop | Simple dispatch | + rounds_since_todo counter|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s03_todo_write.py\n```\n\n1. `Refactor the file hello.py: add type hints, docstrings, and a main guard`\n2. `Create a Python package with __init__.py, utils.py, and tests/test_utils.py`\n3. `Review all Python files and fix any style issues`\n"
},
{
"version": "s04",
"locale": "ja",
"title": "s04: Subagents",
"content": "# s04: Subagents\n\n`s01 > s02 > s03 > [ s04 ] s05 > s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"大きなタスクを分割し、各サブタスクにクリーンなコンテキストを\"* -- サブエージェントは独立した messages[] を使い、メイン会話を汚さない。\n\n## 問題\n\nエージェントが作業するにつれ、messages配列は膨張し続ける。すべてのファイル読み取り、すべてのbash出力がコンテキストに永久に残る。「このプロジェクトはどのテストフレームワークを使っているか」という質問は5つのファイルを読む必要があるかもしれないが、親に必要なのは「pytest」という答えだけだ。\n\n## 解決策\n\n```\nParent agent Subagent\n+------------------+ +------------------+\n| messages=[...] | | messages=[] | <-- fresh\n| | dispatch | |\n| tool: task | ----------> | while tool_use: |\n| prompt=\"...\" | | call tools |\n| | summary | append results |\n| result = \"...\" | <---------- | return last text |\n+------------------+ +------------------+\n\nParent context stays clean. Subagent context is discarded.\n```\n\n## 仕組み\n\n1. 親に`task`ツールを追加する。子は`task`を除くすべての基本ツールを取得する(再帰的な生成は不可)。\n\n```python\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\",\n \"description\": \"Spawn a subagent with fresh context.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"prompt\": {\"type\": \"string\"}},\n \"required\": [\"prompt\"],\n }},\n]\n```\n\n2. サブエージェントは`messages=[]`で開始し、自身のループを実行する。最終テキストだけが親に返る。\n\n```python\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM,\n messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\",\n \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input)\n results.append({\"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n return \"\".join(\n b.text for b in response.content if hasattr(b, \"text\")\n ) or \"(no summary)\"\n```\n\n子のメッセージ履歴全体(30回以上のツール呼び出し)は破棄される。親は1段落の要約を通常の`tool_result`として受け取る。\n\n## s03からの変更点\n\n| Component | Before (s03) | After (s04) |\n|----------------|------------------|---------------------------|\n| Tools | 5 | 5 (base) + task (parent) |\n| Context | Single shared | Parent + child isolation |\n| Subagent | None | `run_subagent()` function |\n| Return value | N/A | Summary text only |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s04_subagent.py\n```\n\n1. `Use a subtask to find what testing framework this project uses`\n2. `Delegate: read all .py files and summarize what each one does`\n3. `Use a task to create a new module, then verify it from here`\n"
},
{
"version": "s05",
"locale": "ja",
"title": "s05: Skills",
"content": "# s05: Skills\n\n`s01 > s02 > s03 > s04 > [ s05 ] s06 | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"必要な知識を、必要な時に読み込む\"* -- system prompt ではなく tool_result で注入。\n\n## 問題\n\nエージェントにドメイン固有のワークフローを遵守させたい: gitの規約、テストパターン、コードレビューチェックリスト。すべてをシステムプロンプトに入れると、使われないスキルにトークンを浪費する。10スキル x 2000トークン = 20,000トークン、ほとんどが任意のタスクに無関係だ。\n\n## 解決策\n\n```\nSystem prompt (Layer 1 -- always present):\n+--------------------------------------+\n| You are a coding agent. |\n| Skills available: |\n| - git: Git workflow helpers | ~100 tokens/skill\n| - test: Testing best practices |\n+--------------------------------------+\n\nWhen model calls load_skill(\"git\"):\n+--------------------------------------+\n| tool_result (Layer 2 -- on demand): |\n| |\n| Full git workflow instructions... | ~2000 tokens\n| Step 1: ... |\n| |\n+--------------------------------------+\n```\n\n第1層: スキル*名*をシステムプロンプトに(低コスト)。第2層: スキル*本体*をtool_resultに(オンデマンド)。\n\n## 仕組み\n\n1. 各スキルは `SKILL.md` ファイルを含むディレクトリとして配置される。\n\n```\nskills/\n pdf/\n SKILL.md # ---\\n name: pdf\\n description: Process PDF files\\n ---\\n ...\n code-review/\n SKILL.md # ---\\n name: code-review\\n description: Review code\\n ---\\n ...\n```\n\n2. SkillLoaderが `SKILL.md` を再帰的に探索し、ディレクトリ名をスキル識別子として使用する。\n\n```python\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills = {}\n for f in sorted(skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body}\n\n def get_descriptions(self) -> str:\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"\")\n lines.append(f\" - {name}: {desc}\")\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'.\"\n return f\"\\n{skill['body']}\\n \"\n```\n\n3. 第1層はシステムプロンプトに配置。第2層は通常のツールハンドラ。\n\n```python\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\nTOOL_HANDLERS = {\n # ...base tools...\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n```\n\nモデルはどのスキルが存在するかを知り(低コスト)、関連する時にだけ読み込む(高コスト)。\n\n## s04からの変更点\n\n| Component | Before (s04) | After (s05) |\n|----------------|------------------|----------------------------|\n| Tools | 5 (base + task) | 5 (base + load_skill) |\n| System prompt | Static string | + skill descriptions |\n| Knowledge | None | skills/\\*/SKILL.md files |\n| Injection | None | Two-layer (system + result)|\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s05_skill_loading.py\n```\n\n1. `What skills are available?`\n2. `Load the agent-builder skill and follow its instructions`\n3. `I need to do a code review -- load the relevant skill first`\n4. `Build an MCP server using the mcp-builder skill`\n"
},
{
"version": "s06",
"locale": "ja",
"title": "s06: Context Compact",
"content": "# s06: Context Compact\n\n`s01 > s02 > s03 > s04 > s05 > [ s06 ] | s07 > s08 > s09 > s10 > s11 > s12`\n\n> *\"コンテキストはいつか溢れる、空ける手段が要る\"* -- 3層圧縮で無限セッションを実現。\n\n## 問題\n\nコンテキストウィンドウは有限だ。1000行のファイルに対する`read_file`1回で約4000トークンを消費する。30ファイルを読み20回のbashコマンドを実行すると、100,000トークン超。圧縮なしでは、エージェントは大規模コードベースで作業できない。\n\n## 解決策\n\n積極性を段階的に上げる3層構成:\n\n```\nEvery turn:\n+------------------+\n| Tool call result |\n+------------------+\n |\n v\n[Layer 1: micro_compact] (silent, every turn)\n Replace tool_result > 3 turns old\n with \"[Previous: used {tool_name}]\"\n |\n v\n[Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\ncontinue [Layer 2: auto_compact]\n Save transcript to .transcripts/\n LLM summarizes conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact explicitly.\n Same summarization as auto_compact.\n```\n\n## 仕組み\n\n1. **第1層 -- micro_compact**: 各LLM呼び出しの前に、古いツール結果をプレースホルダーに置換する。\n\n```python\ndef micro_compact(messages: list) -> list:\n tool_results = []\n for i, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for j, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((i, j, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n for _, _, part in tool_results[:-KEEP_RECENT]:\n if len(part.get(\"content\", \"\")) > 100:\n part[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n```\n\n2. **第2層 -- auto_compact**: トークンが閾値を超えたら、完全なトランスクリプトをディスクに保存し、LLMに要約を依頼する。\n\n```python\ndef auto_compact(messages: list) -> list:\n # Save transcript for recovery\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n # LLM summarizes\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity...\"\n + json.dumps(messages, default=str)[:80000]}],\n max_tokens=2000,\n )\n return [\n {\"role\": \"user\", \"content\": f\"[Compressed]\\n\\n{response.content[0].text}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. Continuing.\"},\n ]\n```\n\n3. **第3層 -- manual compact**: `compact`ツールが同じ要約処理をオンデマンドでトリガーする。\n\n4. ループが3層すべてを統合する:\n\n```python\ndef agent_loop(messages: list):\n while True:\n micro_compact(messages) # Layer 1\n if estimate_tokens(messages) > THRESHOLD:\n messages[:] = auto_compact(messages) # Layer 2\n response = client.messages.create(...)\n # ... tool execution ...\n if manual_compact:\n messages[:] = auto_compact(messages) # Layer 3\n```\n\nトランスクリプトがディスク上に完全な履歴を保持する。何も真に失われず、アクティブなコンテキストの外に移動されるだけ。\n\n## s05からの変更点\n\n| Component | Before (s05) | After (s06) |\n|----------------|------------------|----------------------------|\n| Tools | 5 | 5 (base + compact) |\n| Context mgmt | None | Three-layer compression |\n| Micro-compact | None | Old results -> placeholders|\n| Auto-compact | None | Token threshold trigger |\n| Transcripts | None | Saved to .transcripts/ |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s06_context_compact.py\n```\n\n1. `Read every Python file in the agents/ directory one by one` (micro-compactが古い結果を置換するのを観察する)\n2. `Keep reading files until compression triggers automatically`\n3. `Use the compact tool to manually compress the conversation`\n"
},
{
"version": "s07",
"locale": "ja",
"title": "s07: Task System",
"content": "# s07: Task System\n\n`s01 > s02 > s03 > s04 > s05 > s06 | [ s07 ] s08 > s09 > s10 > s11 > s12`\n\n> *\"大きな目標を小タスクに分解し、順序付けし、ディスクに記録する\"* -- ファイルベースのタスクグラフ、マルチエージェント協調の基盤。\n\n## 問題\n\ns03のTodoManagerはメモリ上のフラットなチェックリストに過ぎない: 順序なし、依存関係なし、ステータスは完了か未完了のみ。実際の目標には構造がある -- タスクBはタスクAに依存し、タスクCとDは並行実行でき、タスクEはCとDの両方を待つ。\n\n明示的な関係がなければ、エージェントは何が実行可能で、何がブロックされ、何が同時に走れるかを判断できない。しかもリストはメモリ上にしかないため、コンテキスト圧縮(s06)で消える。\n\n## 解決策\n\nフラットなチェックリストをディスクに永続化する**タスクグラフ**に昇格させる。各タスクは1つのJSONファイルで、ステータス・前方依存(`blockedBy`)・後方依存(`blocks`)を持つ。タスクグラフは常に3つの問いに答える:\n\n- **何が実行可能か?** -- `pending`ステータスで`blockedBy`が空のタスク。\n- **何がブロックされているか?** -- 未完了の依存を待つタスク。\n- **何が完了したか?** -- `completed`のタスク。完了時に後続タスクを自動的にアンブロックする。\n\n```\n.tasks/\n task_1.json {\"id\":1, \"status\":\"completed\"}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\"}\n task_3.json {\"id\":3, \"blockedBy\":[1], \"status\":\"pending\"}\n task_4.json {\"id\":4, \"blockedBy\":[2,3], \"status\":\"pending\"}\n\nタスクグラフ (DAG):\n +----------+\n +--> | task 2 | --+\n | | pending | |\n+----------+ +----------+ +--> +----------+\n| task 1 | | task 4 |\n| completed| --> +----------+ +--> | blocked |\n+----------+ | task 3 | --+ +----------+\n | pending |\n +----------+\n\n順序: task 1 は 2 と 3 より先に完了する必要がある\n並行: task 2 と 3 は同時に実行できる\n依存: task 4 は 2 と 3 の両方を待つ\nステータス: pending -> in_progress -> completed\n```\n\nこのタスクグラフは s07 以降の全メカニズムの協調バックボーンとなる: バックグラウンド実行(s08)、マルチエージェントチーム(s09+)、worktree分離(s12)はすべてこの同じ構造を読み書きする。\n\n## 仕組み\n\n1. **TaskManager**: タスクごとに1つのJSONファイル、依存グラフ付きCRUD。\n\n```python\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def create(self, subject, description=\"\"):\n task = {\"id\": self._next_id, \"subject\": subject,\n \"status\": \"pending\", \"blockedBy\": [],\n \"blocks\": [], \"owner\": \"\"}\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n```\n\n2. **依存解除**: タスク完了時に、他タスクの`blockedBy`リストから完了IDを除去し、後続タスクをアンブロックする。\n\n```python\ndef _clear_dependency(self, completed_id):\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n```\n\n3. **ステータス遷移 + 依存配線**: `update`がステータス変更と依存エッジを担う。\n\n```python\ndef update(self, task_id, status=None,\n add_blocked_by=None, add_blocks=None):\n task = self._load(task_id)\n if status:\n task[\"status\"] = status\n if status == \"completed\":\n self._clear_dependency(task_id)\n self._save(task)\n```\n\n4. 4つのタスクツールをディスパッチマップに追加する。\n\n```python\nTOOL_HANDLERS = {\n # ...base tools...\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n```\n\ns07以降、タスクグラフがマルチステップ作業のデフォルト。s03のTodoは軽量な単一セッション用チェックリストとして残る。\n\n## s06からの変更点\n\n| コンポーネント | Before (s06) | After (s07) |\n|---|---|---|\n| Tools | 5 | 8 (`task_create/update/list/get`) |\n| 計画モデル | フラットチェックリスト (メモリ) | 依存関係付きタスクグラフ (ディスク) |\n| 関係 | なし | `blockedBy` + `blocks` エッジ |\n| ステータス追跡 | 完了か未完了 | `pending` -> `in_progress` -> `completed` |\n| 永続性 | 圧縮で消失 | 圧縮・再起動後も存続 |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s07_task_system.py\n```\n\n1. `Create 3 tasks: \"Setup project\", \"Write code\", \"Write tests\". Make them depend on each other in order.`\n2. `List all tasks and show the dependency graph`\n3. `Complete task 1 and then list tasks to see task 2 unblocked`\n4. `Create a task board for refactoring: parse -> transform -> emit -> test, where transform and emit can run in parallel after parse`\n"
},
{
"version": "s08",
"locale": "ja",
"title": "s08: Background Tasks",
"content": "# s08: Background Tasks\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > [ s08 ] s09 > s10 > s11 > s12`\n\n> *\"遅い操作はバックグラウンドへ、エージェントは次を考え続ける\"* -- デーモンスレッドがコマンド実行、完了後に通知を注入。\n\n## 問題\n\n一部のコマンドは数分かかる: `npm install`、`pytest`、`docker build`。ブロッキングループでは、モデルはサブプロセスの完了を待って座っている。ユーザーが「依存関係をインストールして、その間にconfigファイルを作って」と言っても、エージェントは並列ではなく逐次的に処理する。\n\n## 解決策\n\n```\nMain thread Background thread\n+-----------------+ +-----------------+\n| agent loop | | subprocess runs |\n| ... | | ... |\n| [LLM call] <---+------- | enqueue(result) |\n| ^drain queue | +-----------------+\n+-----------------+\n\nTimeline:\nAgent --[spawn A]--[spawn B]--[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- results injected before next LLM call --+\n```\n\n## 仕組み\n\n1. BackgroundManagerがスレッドセーフな通知キューでタスクを追跡する。\n\n```python\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {}\n self._notification_queue = []\n self._lock = threading.Lock()\n```\n\n2. `run()`がデーモンスレッドを開始し、即座にリターンする。\n\n```python\ndef run(self, command: str) -> str:\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True)\n thread.start()\n return f\"Background task {task_id} started\"\n```\n\n3. サブプロセス完了時に、結果を通知キューへ。\n\n```python\ndef _execute(self, task_id, command):\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300)\n output = (r.stdout + r.stderr).strip()[:50000]\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id, \"result\": output[:500]})\n```\n\n4. エージェントループが各LLM呼び出しの前に通知をドレインする。\n\n```python\ndef agent_loop(messages: list):\n while True:\n notifs = BG.drain_notifications()\n if notifs:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['result']}\" for n in notifs)\n messages.append({\"role\": \"user\",\n \"content\": f\"\\n{notif_text}\\n\"\n f\" \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted background results.\"})\n response = client.messages.create(...)\n```\n\nループはシングルスレッドのまま。サブプロセスI/Oだけが並列化される。\n\n## s07からの変更点\n\n| Component | Before (s07) | After (s08) |\n|----------------|------------------|----------------------------|\n| Tools | 8 | 6 (base + background_run + check)|\n| Execution | Blocking only | Blocking + background threads|\n| Notification | None | Queue drained per loop |\n| Concurrency | None | Daemon threads |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s08_background_tasks.py\n```\n\n1. `Run \"sleep 5 && echo done\" in the background, then create a file while it runs`\n2. `Start 3 background tasks: \"sleep 2\", \"sleep 4\", \"sleep 6\". Check their status.`\n3. `Run pytest in the background and keep working on other things`\n"
},
{
"version": "s09",
"locale": "ja",
"title": "s09: Agent Teams",
"content": "# s09: Agent Teams\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > [ s09 ] s10 > s11 > s12`\n\n> *\"一人で終わらないなら、チームメイトに任せる\"* -- 永続チームメイト + 非同期メールボックス。\n\n## 問題\n\nサブエージェント(s04)は使い捨てだ: 生成し、作業し、要約を返し、消滅する。アイデンティティもなく、呼び出し間の記憶もない。バックグラウンドタスク(s08)はシェルコマンドを実行するが、LLM誘導の意思決定はできない。\n\n本物のチームワークには: (1)単一プロンプトを超えて存続する永続エージェント、(2)アイデンティティとライフサイクル管理、(3)エージェント間の通信チャネルが必要だ。\n\n## 解決策\n\n```\nTeammate lifecycle:\n spawn -> WORKING -> IDLE -> WORKING -> ... -> SHUTDOWN\n\nCommunication:\n .team/\n config.json <- team roster + statuses\n inbox/\n alice.jsonl <- append-only, drain-on-read\n bob.jsonl\n lead.jsonl\n\n +--------+ send(\"alice\",\"bob\",\"...\") +--------+\n | alice | -----------------------------> | bob |\n | loop | bob.jsonl << {json_line} | loop |\n +--------+ +--------+\n ^ |\n | BUS.read_inbox(\"alice\") |\n +---- alice.jsonl -> read + drain ---------+\n```\n\n## 仕組み\n\n1. TeammateManagerがconfig.jsonでチーム名簿を管理する。\n\n```python\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n```\n\n2. `spawn()`がチームメイトを作成し、そのエージェントループをスレッドで開始する。\n\n```python\ndef spawn(self, name: str, role: str, prompt: str) -> str:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt), daemon=True)\n thread.start()\n return f\"Spawned teammate '{name}' (role: {role})\"\n```\n\n3. MessageBus: 追記専用のJSONLインボックス。`send()`がJSON行を追記し、`read_inbox()`がすべて読み取ってドレインする。\n\n```python\nclass MessageBus:\n def send(self, sender, to, content, msg_type=\"message\", extra=None):\n msg = {\"type\": msg_type, \"from\": sender,\n \"content\": content, \"timestamp\": time.time()}\n if extra:\n msg.update(extra)\n with open(self.dir / f\"{to}.jsonl\", \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n\n def read_inbox(self, name):\n path = self.dir / f\"{name}.jsonl\"\n if not path.exists(): return \"[]\"\n msgs = [json.loads(l) for l in path.read_text().strip().splitlines() if l]\n path.write_text(\"\") # drain\n return json.dumps(msgs, indent=2)\n```\n\n4. 各チームメイトは各LLM呼び出しの前にインボックスを確認し、受信メッセージをコンテキストに注入する。\n\n```python\ndef _teammate_loop(self, name, role, prompt):\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n if inbox != \"[]\":\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n messages.append({\"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\"})\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools, append results...\n self._find_member(name)[\"status\"] = \"idle\"\n```\n\n## s08からの変更点\n\n| Component | Before (s08) | After (s09) |\n|----------------|------------------|----------------------------|\n| Tools | 6 | 9 (+spawn/send/read_inbox) |\n| Agents | Single | Lead + N teammates |\n| Persistence | None | config.json + JSONL inboxes|\n| Threads | Background cmds | Full agent loops per thread|\n| Lifecycle | Fire-and-forget | idle -> working -> idle |\n| Communication | None | message + broadcast |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s09_agent_teams.py\n```\n\n1. `Spawn alice (coder) and bob (tester). Have alice send bob a message.`\n2. `Broadcast \"status update: phase 1 complete\" to all teammates`\n3. `Check the lead inbox for any messages`\n4. `/team`と入力してステータス付きのチーム名簿を確認する\n5. `/inbox`と入力してリーダーのインボックスを手動確認する\n"
},
{
"version": "s10",
"locale": "ja",
"title": "s10: Team Protocols",
"content": "# s10: Team Protocols\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > [ s10 ] s11 > s12`\n\n> *\"チームメイト間には統一の通信ルールが必要\"* -- 1つの request-response パターンが全交渉を駆動。\n\n## 問題\n\ns09ではチームメイトが作業し通信するが、構造化された協調がない:\n\n**シャットダウン**: スレッドを強制終了するとファイルが中途半端に書かれ、config.jsonが不正な状態になる。ハンドシェイクが必要 -- リーダーが要求し、チームメイトが承認(完了して退出)か拒否(作業継続)する。\n\n**プラン承認**: リーダーが「認証モジュールをリファクタリングして」と言うと、チームメイトは即座に開始する。リスクの高い変更では、実行前にリーダーが計画をレビューすべきだ。\n\n両方とも同じ構造: 一方がユニークIDを持つリクエストを送り、他方がそのIDで応答する。\n\n## 解決策\n\n```\nShutdown Protocol Plan Approval Protocol\n================== ======================\n\nLead Teammate Teammate Lead\n | | | |\n |--shutdown_req-->| |--plan_req------>|\n | {req_id:\"abc\"} | | {req_id:\"xyz\"} |\n | | | |\n |<--shutdown_resp-| |<--plan_resp-----|\n | {req_id:\"abc\", | | {req_id:\"xyz\", |\n | approve:true} | | approve:true} |\n\nShared FSM:\n [pending] --approve--> [approved]\n [pending] --reject---> [rejected]\n\nTrackers:\n shutdown_requests = {req_id: {target, status}}\n plan_requests = {req_id: {from, plan, status}}\n```\n\n## 仕組み\n\n1. リーダーがrequest_idを生成し、インボックス経由でシャットダウンを開始する。\n\n```python\nshutdown_requests = {}\n\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id})\n return f\"Shutdown request {req_id} sent (status: pending)\"\n```\n\n2. チームメイトがリクエストを受信し、承認または拒否で応答する。\n\n```python\nif tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\",\n {\"request_id\": req_id, \"approve\": approve})\n```\n\n3. プラン承認も同一パターン。チームメイトがプランを提出(request_idを生成)、リーダーがレビュー(同じrequest_idを参照)。\n\n```python\nplan_requests = {}\n\ndef handle_plan_review(request_id, approve, feedback=\"\"):\n req = plan_requests[request_id]\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\"lead\", req[\"from\"], feedback,\n \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve})\n```\n\n1つのFSM、2つの応用。同じ`pending -> approved | rejected`状態機械が、あらゆるリクエスト-レスポンスプロトコルに適用できる。\n\n## s09からの変更点\n\n| Component | Before (s09) | After (s10) |\n|----------------|------------------|------------------------------|\n| Tools | 9 | 12 (+shutdown_req/resp +plan)|\n| Shutdown | Natural exit only| Request-response handshake |\n| Plan gating | None | Submit/review with approval |\n| Correlation | None | request_id per request |\n| FSM | None | pending -> approved/rejected |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s10_team_protocols.py\n```\n\n1. `Spawn alice as a coder. Then request her shutdown.`\n2. `List teammates to see alice's status after shutdown approval`\n3. `Spawn bob with a risky refactoring task. Review and reject his plan.`\n4. `Spawn charlie, have him submit a plan, then approve it.`\n5. `/team`と入力してステータスを監視する\n"
},
{
"version": "s11",
"locale": "ja",
"title": "s11: Autonomous Agents",
"content": "# s11: Autonomous Agents\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > [ s11 ] s12`\n\n> *\"チームメイトが自らボードを見て、仕事を取る\"* -- リーダーが逐一割り振る必要はない。\n\n## 問題\n\ns09-s10では、チームメイトは明示的に指示された時のみ作業する。リーダーは各チームメイトを特定のプロンプトでspawnしなければならない。タスクボードに未割り当てのタスクが10個あっても、リーダーが手動で各タスクを割り当てる。これはスケールしない。\n\n真の自律性とは、チームメイトが自分で作業を見つけること: タスクボードをスキャンし、未確保のタスクを確保し、作業し、完了したら次を探す。\n\nもう1つの問題: コンテキスト圧縮(s06)後にエージェントが自分の正体を忘れる可能性がある。アイデンティティ再注入がこれを解決する。\n\n## 解決策\n\n```\nTeammate lifecycle with idle cycle:\n\n+-------+\n| spawn |\n+---+---+\n |\n v\n+-------+ tool_use +-------+\n| WORK | <------------- | LLM |\n+---+---+ +-------+\n |\n | stop_reason != tool_use (or idle tool called)\n v\n+--------+\n| IDLE | poll every 5s for up to 60s\n+---+----+\n |\n +---> check inbox --> message? ----------> WORK\n |\n +---> scan .tasks/ --> unclaimed? -------> claim -> WORK\n |\n +---> 60s timeout ----------------------> SHUTDOWN\n\nIdentity re-injection after compression:\n if len(messages) <= 3:\n messages.insert(0, identity_block)\n```\n\n## 仕組み\n\n1. チームメイトのループはWORKとIDLEの2フェーズ。LLMがツール呼び出しを止めた時(または`idle`ツールを呼んだ時)、IDLEフェーズに入る。\n\n```python\ndef _loop(self, name, role, prompt):\n while True:\n # -- WORK PHASE --\n messages = [{\"role\": \"user\", \"content\": prompt}]\n for _ in range(50):\n response = client.messages.create(...)\n if response.stop_reason != \"tool_use\":\n break\n # execute tools...\n if idle_requested:\n break\n\n # -- IDLE PHASE --\n self._set_status(name, \"idle\")\n resume = self._idle_poll(name, messages)\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n```\n\n2. IDLEフェーズがインボックスとタスクボードをポーリングする。\n\n```python\ndef _idle_poll(self, name, messages):\n for _ in range(IDLE_TIMEOUT // POLL_INTERVAL): # 60s / 5s = 12\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n messages.append({\"role\": \"user\",\n \"content\": f\"{inbox} \"})\n return True\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n claim_task(unclaimed[0][\"id\"], name)\n messages.append({\"role\": \"user\",\n \"content\": f\"Task #{unclaimed[0]['id']}: \"\n f\"{unclaimed[0]['subject']} \"})\n return True\n return False # timeout -> shutdown\n```\n\n3. タスクボードスキャン: pendingかつ未割り当てかつブロックされていないタスクを探す。\n\n```python\ndef scan_unclaimed_tasks() -> list:\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n```\n\n4. アイデンティティ再注入: コンテキストが短すぎる(圧縮が起きた)場合にアイデンティティブロックを挿入する。\n\n```python\nif len(messages) <= 3:\n messages.insert(0, {\"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, \"\n f\"team: {team_name}. Continue your work. \"})\n messages.insert(1, {\"role\": \"assistant\",\n \"content\": f\"I am {name}. Continuing.\"})\n```\n\n## s10からの変更点\n\n| Component | Before (s10) | After (s11) |\n|----------------|------------------|----------------------------|\n| Tools | 12 | 14 (+idle, +claim_task) |\n| Autonomy | Lead-directed | Self-organizing |\n| Idle phase | None | Poll inbox + task board |\n| Task claiming | Manual only | Auto-claim unclaimed tasks |\n| Identity | System prompt | + re-injection after compress|\n| Timeout | None | 60s idle -> auto shutdown |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s11_autonomous_agents.py\n```\n\n1. `Create 3 tasks on the board, then spawn alice and bob. Watch them auto-claim.`\n2. `Spawn a coder teammate and let it find work from the task board itself`\n3. `Create tasks with dependencies. Watch teammates respect the blocked order.`\n4. `/tasks`と入力してオーナー付きのタスクボードを確認する\n5. `/team`と入力して誰が作業中でアイドルかを監視する\n"
},
{
"version": "s12",
"locale": "ja",
"title": "s12: Worktree + Task Isolation",
"content": "# s12: Worktree + Task Isolation\n\n`s01 > s02 > s03 > s04 > s05 > s06 | s07 > s08 > s09 > s10 > s11 > [ s12 ]`\n\n> *\"各自のディレクトリで作業し、互いに干渉しない\"* -- タスクは目標を管理、worktree はディレクトリを管理、IDで紐付け。\n\n## 問題\n\ns11までにエージェントはタスクを自律的に確保して完了できるようになった。しかし全タスクが1つの共有ディレクトリで走る。2つのエージェントが同時に異なるモジュールをリファクタリングすると衝突する: 片方が`config.py`を編集し、もう片方も`config.py`を編集し、未コミットの変更が混ざり合い、どちらもクリーンにロールバックできない。\n\nタスクボードは*何をやるか*を追跡するが、*どこでやるか*には関知しない。解決策: 各タスクに専用のgit worktreeディレクトリを与える。タスクが目標を管理し、worktreeが実行コンテキストを管理する。タスクIDで紐付ける。\n\n## 解決策\n\n```\nControl plane (.tasks/) Execution plane (.worktrees/)\n+------------------+ +------------------------+\n| task_1.json | | auth-refactor/ |\n| status: in_progress <------> branch: wt/auth-refactor\n| worktree: \"auth-refactor\" | task_id: 1 |\n+------------------+ +------------------------+\n| task_2.json | | ui-login/ |\n| status: pending <------> branch: wt/ui-login\n| worktree: \"ui-login\" | task_id: 2 |\n+------------------+ +------------------------+\n |\n index.json (worktree registry)\n events.jsonl (lifecycle log)\n\nState machines:\n Task: pending -> in_progress -> completed\n Worktree: absent -> active -> removed | kept\n```\n\n## 仕組み\n\n1. **タスクを作成する。** まず目標を永続化する。\n\n```python\nTASKS.create(\"Implement auth refactor\")\n# -> .tasks/task_1.json status=pending worktree=\"\"\n```\n\n2. **worktreeを作成してタスクに紐付ける。** `task_id`を渡すと、タスクが自動的に`in_progress`に遷移する。\n\n```python\nWORKTREES.create(\"auth-refactor\", task_id=1)\n# -> git worktree add -b wt/auth-refactor .worktrees/auth-refactor HEAD\n# -> index.json gets new entry, task_1.json gets worktree=\"auth-refactor\"\n```\n\n紐付けは両側に状態を書き込む:\n\n```python\ndef bind_worktree(self, task_id, worktree):\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n self._save(task)\n```\n\n3. **worktree内でコマンドを実行する。** `cwd`が分離ディレクトリを指す。\n\n```python\nsubprocess.run(command, shell=True, cwd=worktree_path,\n capture_output=True, text=True, timeout=300)\n```\n\n4. **終了処理。** 2つの選択肢:\n - `worktree_keep(name)` -- ディレクトリを保持する。\n - `worktree_remove(name, complete_task=True)` -- ディレクトリを削除し、紐付けられたタスクを完了し、イベントを発行する。1回の呼び出しで後片付けと完了を処理する。\n\n```python\ndef remove(self, name, force=False, complete_task=False):\n self._run_git([\"worktree\", \"remove\", wt[\"path\"]])\n if complete_task and wt.get(\"task_id\") is not None:\n self.tasks.update(wt[\"task_id\"], status=\"completed\")\n self.tasks.unbind_worktree(wt[\"task_id\"])\n self.events.emit(\"task.completed\", ...)\n```\n\n5. **イベントストリーム。** ライフサイクルの各ステップが`.worktrees/events.jsonl`に記録される:\n\n```json\n{\n \"event\": \"worktree.remove.after\",\n \"task\": {\"id\": 1, \"status\": \"completed\"},\n \"worktree\": {\"name\": \"auth-refactor\", \"status\": \"removed\"},\n \"ts\": 1730000000\n}\n```\n\n発行されるイベント: `worktree.create.before/after/failed`, `worktree.remove.before/after/failed`, `worktree.keep`, `task.completed`。\n\nクラッシュ後も`.tasks/` + `.worktrees/index.json`から状態を再構築できる。会話メモリは揮発性だが、ファイル状態は永続的だ。\n\n## s11からの変更点\n\n| Component | Before (s11) | After (s12) |\n|--------------------|----------------------------|----------------------------------------------|\n| Coordination | Task board (owner/status) | Task board + explicit worktree binding |\n| Execution scope | Shared directory | Task-scoped isolated directory |\n| Recoverability | Task status only | Task status + worktree index |\n| Teardown | Task completion | Task completion + explicit keep/remove |\n| Lifecycle visibility | Implicit in logs | Explicit events in `.worktrees/events.jsonl` |\n\n## 試してみる\n\n```sh\ncd learn-claude-code\npython agents/s12_worktree_task_isolation.py\n```\n\n1. `Create tasks for backend auth and frontend login page, then list tasks.`\n2. `Create worktree \"auth-refactor\" for task 1, then bind task 2 to a new worktree \"ui-login\".`\n3. `Run \"git status --short\" in worktree \"auth-refactor\".`\n4. `Keep worktree \"ui-login\", then list worktrees and inspect events.`\n5. `Remove worktree \"auth-refactor\" with complete_task=true, then list tasks/worktrees/events.`\n"
}
]
================================================
FILE: web/src/data/generated/versions.json
================================================
{
"versions": [
{
"id": "s01",
"filename": "s01_agent_loop.py",
"title": "The Agent Loop",
"subtitle": "Bash is All You Need",
"loc": 84,
"tools": [
"bash"
],
"newTools": [
"bash"
],
"coreAddition": "Single-tool agent loop",
"keyInsight": "The minimal agent kernel is a while loop + one tool",
"classes": [],
"functions": [
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
"startLine": 53
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 67
}
],
"layer": "tools",
"source": "#!/usr/bin/env python3\n\"\"\"\ns01_agent_loop.py - The Agent Loop\n\nThe entire secret of an AI coding agent in one pattern:\n\n while stop_reason == \"tool_use\":\n response = LLM(messages, tools)\n execute tools\n append results\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tool |\n | prompt | | | | execute |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n (loop continues)\n\nThis is the core loop: feed tool results back to the model\nuntil the model decides to stop. Production agents layer\npolicy, hooks, and lifecycle controls on top.\n\"\"\"\n\nimport os\nimport subprocess\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {os.getcwd()}. Use bash to solve tasks. Act, don't explain.\"\n\nTOOLS = [{\n \"name\": \"bash\",\n \"description\": \"Run a shell command.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n}]\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=os.getcwd(),\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\n# -- The core pattern: a while loop that calls tools until the model stops --\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n # Append assistant turn\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n # If the model didn't call a tool, we're done\n if response.stop_reason != \"tool_use\":\n return\n # Execute each tool call, collect results\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n print(f\"\\033[33m$ {block.input['command']}\\033[0m\")\n output = run_bash(block.input[\"command\"])\n print(output[:200])\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id,\n \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms01 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n response_content = history[-1][\"content\"]\n if isinstance(response_content, list):\n for block in response_content:\n if hasattr(block, \"text\"):\n print(block.text)\n print()\n"
},
{
"id": "s02",
"filename": "s02_tool_use.py",
"title": "Tools",
"subtitle": "One Handler Per Tool",
"loc": 115,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file"
],
"newTools": [
"read_file",
"write_file",
"edit_file"
],
"coreAddition": "Tool dispatch map",
"keyInsight": "The loop stays the same; new tools register into the dispatch map",
"classes": [],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
"startLine": 40
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
"startLine": 47
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
"startLine": 60
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
"startLine": 71
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
"startLine": 81
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 113
}
],
"layer": "tools",
"source": "#!/usr/bin/env python3\n\"\"\"\ns02_tool_use.py - Tools\n\nThe agent loop from s01 didn't change. We just added tools to the array\nand a dispatch map to route calls.\n\n +----------+ +-------+ +------------------+\n | User | ---> | LLM | ---> | Tool Dispatch |\n | prompt | | | | { |\n +----------+ +---+---+ | bash: run_bash |\n ^ | read: run_read |\n | | write: run_wr |\n +----------+ edit: run_edit |\n tool_result| } |\n +------------------+\n\nKey insight: \"The loop didn't change at all. I just added tools.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks. Act, don't explain.\"\n\n\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n text = safe_path(path).read_text()\n lines = text.splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more lines)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes to {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- The dispatch map: {tool_name: handler} --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\"> {block.name}: {output[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": output})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms02 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s03",
"filename": "s03_todo_write.py",
"title": "TodoWrite",
"subtitle": "Plan Before You Act",
"loc": 171,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file",
"todo"
],
"newTools": [
"todo"
],
"coreAddition": "TodoManager + nag reminder",
"keyInsight": "An agent without a plan drifts; list the steps first, then execute",
"classes": [
{
"name": "TodoManager",
"startLine": 51,
"endLine": 87
}
],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
"startLine": 92
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
"startLine": 98
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
"startLine": 110
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
"startLine": 119
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
"startLine": 128
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 163
}
],
"layer": "planning",
"source": "#!/usr/bin/env python3\n\"\"\"\ns03_todo_write.py - TodoWrite\n\nThe model tracks its own progress via a TodoManager. A nag reminder\nforces it to keep updating when it forgets.\n\n +----------+ +-------+ +---------+\n | User | ---> | LLM | ---> | Tools |\n | prompt | | | | + todo |\n +----------+ +---+---+ +----+----+\n ^ |\n | tool_result |\n +---------------+\n |\n +-----------+-----------+\n | TodoManager state |\n | [ ] task A |\n | [>] task B <- doing |\n | [x] task C |\n +-----------------------+\n |\n if rounds_since_todo >= 3:\n inject \n\nKey insight: \"The agent can track its own progress -- and I can see it.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse the todo tool to plan multi-step tasks. Mark in_progress before starting, completed when done.\nPrefer tools over prose.\"\"\"\n\n\n# -- TodoManager: structured state the LLM writes to --\nclass TodoManager:\n def __init__(self):\n self.items = []\n\n def update(self, items: list) -> str:\n if len(items) > 20:\n raise ValueError(\"Max 20 todos allowed\")\n validated = []\n in_progress_count = 0\n for i, item in enumerate(items):\n text = str(item.get(\"text\", \"\")).strip()\n status = str(item.get(\"status\", \"pending\")).lower()\n item_id = str(item.get(\"id\", str(i + 1)))\n if not text:\n raise ValueError(f\"Item {item_id}: text required\")\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Item {item_id}: invalid status '{status}'\")\n if status == \"in_progress\":\n in_progress_count += 1\n validated.append({\"id\": item_id, \"text\": text, \"status\": status})\n if in_progress_count > 1:\n raise ValueError(\"Only one task can be in_progress at a time\")\n self.items = validated\n return self.render()\n\n def render(self) -> str:\n if not self.items:\n return \"No todos.\"\n lines = []\n for item in self.items:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}[item[\"status\"]]\n lines.append(f\"{marker} #{item['id']}: {item['text']}\")\n done = sum(1 for t in self.items if t[\"status\"] == \"completed\")\n lines.append(f\"\\n({done}/{len(self.items)} completed)\")\n return \"\\n\".join(lines)\n\n\nTODO = TodoManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"todo\": lambda **kw: TODO.update(kw[\"items\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"todo\", \"description\": \"Update task list. Track progress on multi-step tasks.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"items\": {\"type\": \"array\", \"items\": {\"type\": \"object\", \"properties\": {\"id\": {\"type\": \"string\"}, \"text\": {\"type\": \"string\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}}, \"required\": [\"id\", \"text\", \"status\"]}}}, \"required\": [\"items\"]}},\n]\n\n\n# -- Agent loop with nag reminder injection --\ndef agent_loop(messages: list):\n rounds_since_todo = 0\n while True:\n # Nag reminder is injected below, alongside tool results\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n used_todo = False\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n if block.name == \"todo\":\n used_todo = True\n rounds_since_todo = 0 if used_todo else rounds_since_todo + 1\n if rounds_since_todo >= 3:\n results.insert(0, {\"type\": \"text\", \"text\": \"Update your todos. \"})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms03 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s04",
"filename": "s04_subagent.py",
"title": "Subagents",
"subtitle": "Clean Context Per Subtask",
"loc": 146,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file",
"task"
],
"newTools": [
"task"
],
"coreAddition": "Subagent spawn with isolated messages[]",
"keyInsight": "Subagents use independent messages[], keeping the main conversation clean",
"classes": [],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
"startLine": 46
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
"startLine": 52
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
"startLine": 64
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
"startLine": 73
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
"startLine": 82
},
{
"name": "run_subagent",
"signature": "def run_subagent(prompt: str)",
"startLine": 115
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 143
}
],
"layer": "planning",
"source": "#!/usr/bin/env python3\n\"\"\"\ns04_subagent.py - Subagents\n\nSpawn a child agent with fresh messages=[]. The child works in its own\ncontext, sharing the filesystem, then returns only a summary to the parent.\n\n Parent agent Subagent\n +------------------+ +------------------+\n | messages=[...] | | messages=[] | <-- fresh\n | | dispatch | |\n | tool: task | ---------->| while tool_use: |\n | prompt=\"...\" | | call tools |\n | description=\"\" | | append results |\n | | summary | |\n | result = \"...\" | <--------- | return last text |\n +------------------+ +------------------+\n |\n Parent context stays clean.\n Subagent context is discarded.\n\nKey insight: \"Process isolation gives context isolation for free.\"\n\"\"\"\n\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use the task tool to delegate exploration or subtasks.\"\nSUBAGENT_SYSTEM = f\"You are a coding subagent at {WORKDIR}. Complete the given task, then summarize your findings.\"\n\n\n# -- Tool implementations shared by parent and child --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n}\n\n# Child gets all base tools except task (no recursive spawning)\nCHILD_TOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n]\n\n\n# -- Subagent: fresh context, filtered tools, summary-only return --\ndef run_subagent(prompt: str) -> str:\n sub_messages = [{\"role\": \"user\", \"content\": prompt}] # fresh context\n for _ in range(30): # safety limit\n response = client.messages.create(\n model=MODEL, system=SUBAGENT_SYSTEM, messages=sub_messages,\n tools=CHILD_TOOLS, max_tokens=8000,\n )\n sub_messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)[:50000]})\n sub_messages.append({\"role\": \"user\", \"content\": results})\n # Only the final text returns to the parent -- child context is discarded\n return \"\".join(b.text for b in response.content if hasattr(b, \"text\")) or \"(no summary)\"\n\n\n# -- Parent tools: base tools + task dispatcher --\nPARENT_TOOLS = CHILD_TOOLS + [\n {\"name\": \"task\", \"description\": \"Spawn a subagent with fresh context. It shares the filesystem but not conversation history.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"prompt\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\", \"description\": \"Short description of the task\"}}, \"required\": [\"prompt\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=PARENT_TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"task\":\n desc = block.input.get(\"description\", \"subtask\")\n print(f\"> task ({desc}): {block.input['prompt'][:80]}\")\n output = run_subagent(block.input[\"prompt\"])\n else:\n handler = TOOL_HANDLERS.get(block.name)\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n print(f\" {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms04 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s05",
"filename": "s05_skill_loading.py",
"title": "Skills",
"subtitle": "Load on Demand",
"loc": 182,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file",
"load_skill"
],
"newTools": [
"load_skill"
],
"coreAddition": "SkillLoader + two-layer injection",
"keyInsight": "Inject knowledge via tool_result when needed, not upfront in the system prompt",
"classes": [
{
"name": "SkillLoader",
"startLine": 57,
"endLine": 105
}
],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
"startLine": 117
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
"startLine": 123
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
"startLine": 135
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
"startLine": 144
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
"startLine": 153
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 187
}
],
"layer": "planning",
"source": "#!/usr/bin/env python3\n\"\"\"\ns05_skill_loading.py - Skills\n\nTwo-layer skill injection that avoids bloating the system prompt:\n\n Layer 1 (cheap): skill names in system prompt (~100 tokens/skill)\n Layer 2 (on demand): full skill body in tool_result\n\n skills/\n pdf/\n SKILL.md <-- frontmatter (name, description) + body\n code-review/\n SKILL.md\n\n System prompt:\n +--------------------------------------+\n | You are a coding agent. |\n | Skills available: |\n | - pdf: Process PDF files... | <-- Layer 1: metadata only\n | - code-review: Review code... |\n +--------------------------------------+\n\n When model calls load_skill(\"pdf\"):\n +--------------------------------------+\n | tool_result: |\n | |\n | Full PDF processing instructions | <-- Layer 2: full body\n | Step 1: ... |\n | Step 2: ... |\n | |\n +--------------------------------------+\n\nKey insight: \"Don't put everything in the system prompt. Load on demand.\"\n\"\"\"\n\nimport os\nimport re\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nSKILLS_DIR = WORKDIR / \"skills\"\n\n\n# -- SkillLoader: scan skills//SKILL.md with YAML frontmatter --\nclass SkillLoader:\n def __init__(self, skills_dir: Path):\n self.skills_dir = skills_dir\n self.skills = {}\n self._load_all()\n\n def _load_all(self):\n if not self.skills_dir.exists():\n return\n for f in sorted(self.skills_dir.rglob(\"SKILL.md\")):\n text = f.read_text()\n meta, body = self._parse_frontmatter(text)\n name = meta.get(\"name\", f.parent.name)\n self.skills[name] = {\"meta\": meta, \"body\": body, \"path\": str(f)}\n\n def _parse_frontmatter(self, text: str) -> tuple:\n \"\"\"Parse YAML frontmatter between --- delimiters.\"\"\"\n match = re.match(r\"^---\\n(.*?)\\n---\\n(.*)\", text, re.DOTALL)\n if not match:\n return {}, text\n meta = {}\n for line in match.group(1).strip().splitlines():\n if \":\" in line:\n key, val = line.split(\":\", 1)\n meta[key.strip()] = val.strip()\n return meta, match.group(2).strip()\n\n def get_descriptions(self) -> str:\n \"\"\"Layer 1: short descriptions for the system prompt.\"\"\"\n if not self.skills:\n return \"(no skills available)\"\n lines = []\n for name, skill in self.skills.items():\n desc = skill[\"meta\"].get(\"description\", \"No description\")\n tags = skill[\"meta\"].get(\"tags\", \"\")\n line = f\" - {name}: {desc}\"\n if tags:\n line += f\" [{tags}]\"\n lines.append(line)\n return \"\\n\".join(lines)\n\n def get_content(self, name: str) -> str:\n \"\"\"Layer 2: full skill body returned in tool_result.\"\"\"\n skill = self.skills.get(name)\n if not skill:\n return f\"Error: Unknown skill '{name}'. Available: {', '.join(self.skills.keys())}\"\n return f\"\\n{skill['body']}\\n \"\n\n\nSKILL_LOADER = SkillLoader(SKILLS_DIR)\n\n# Layer 1: skill metadata injected into system prompt\nSYSTEM = f\"\"\"You are a coding agent at {WORKDIR}.\nUse load_skill to access specialized knowledge before tackling unfamiliar topics.\n\nSkills available:\n{SKILL_LOADER.get_descriptions()}\"\"\"\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"load_skill\": lambda **kw: SKILL_LOADER.get_content(kw[\"name\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"load_skill\", \"description\": \"Load specialized knowledge by name.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\", \"description\": \"Skill name to load\"}}, \"required\": [\"name\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms05 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s06",
"filename": "s06_context_compact.py",
"title": "Compact",
"subtitle": "Three-Layer Compression",
"loc": 200,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file",
"compact"
],
"newTools": [
"compact"
],
"coreAddition": "micro-compact + auto-compact + archival",
"keyInsight": "Context will fill up; three-layer compression strategy enables infinite sessions",
"classes": [],
"functions": [
{
"name": "estimate_tokens",
"signature": "def estimate_tokens(messages: list)",
"startLine": 61
},
{
"name": "micro_compact",
"signature": "def micro_compact(messages: list)",
"startLine": 67
},
{
"name": "auto_compact",
"signature": "def auto_compact(messages: list)",
"startLine": 97
},
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
"startLine": 124
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
"startLine": 130
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
"startLine": 142
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
"startLine": 151
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
"startLine": 160
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 194
}
],
"layer": "memory",
"source": "#!/usr/bin/env python3\n\"\"\"\ns06_context_compact.py - Compact\n\nThree-layer compression pipeline so the agent can work forever:\n\n Every turn:\n +------------------+\n | Tool call result |\n +------------------+\n |\n v\n [Layer 1: micro_compact] (silent, every turn)\n Replace tool_result content older than last 3\n with \"[Previous: used {tool_name}]\"\n |\n v\n [Check: tokens > 50000?]\n | |\n no yes\n | |\n v v\n continue [Layer 2: auto_compact]\n Save full transcript to .transcripts/\n Ask LLM to summarize conversation.\n Replace all messages with [summary].\n |\n v\n [Layer 3: compact tool]\n Model calls compact -> immediate summarization.\n Same as auto, triggered manually.\n\nKey insight: \"The agent can forget strategically and keep working forever.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use tools to solve tasks.\"\n\nTHRESHOLD = 50000\nTRANSCRIPT_DIR = WORKDIR / \".transcripts\"\nKEEP_RECENT = 3\n\n\ndef estimate_tokens(messages: list) -> int:\n \"\"\"Rough token count: ~4 chars per token.\"\"\"\n return len(str(messages)) // 4\n\n\n# -- Layer 1: micro_compact - replace old tool results with placeholders --\ndef micro_compact(messages: list) -> list:\n # Collect (msg_index, part_index, tool_result_dict) for all tool_result entries\n tool_results = []\n for msg_idx, msg in enumerate(messages):\n if msg[\"role\"] == \"user\" and isinstance(msg.get(\"content\"), list):\n for part_idx, part in enumerate(msg[\"content\"]):\n if isinstance(part, dict) and part.get(\"type\") == \"tool_result\":\n tool_results.append((msg_idx, part_idx, part))\n if len(tool_results) <= KEEP_RECENT:\n return messages\n # Find tool_name for each result by matching tool_use_id in prior assistant messages\n tool_name_map = {}\n for msg in messages:\n if msg[\"role\"] == \"assistant\":\n content = msg.get(\"content\", [])\n if isinstance(content, list):\n for block in content:\n if hasattr(block, \"type\") and block.type == \"tool_use\":\n tool_name_map[block.id] = block.name\n # Clear old results (keep last KEEP_RECENT)\n to_clear = tool_results[:-KEEP_RECENT]\n for _, _, result in to_clear:\n if isinstance(result.get(\"content\"), str) and len(result[\"content\"]) > 100:\n tool_id = result.get(\"tool_use_id\", \"\")\n tool_name = tool_name_map.get(tool_id, \"unknown\")\n result[\"content\"] = f\"[Previous: used {tool_name}]\"\n return messages\n\n\n# -- Layer 2: auto_compact - save transcript, summarize, replace messages --\ndef auto_compact(messages: list) -> list:\n # Save full transcript to disk\n TRANSCRIPT_DIR.mkdir(exist_ok=True)\n transcript_path = TRANSCRIPT_DIR / f\"transcript_{int(time.time())}.jsonl\"\n with open(transcript_path, \"w\") as f:\n for msg in messages:\n f.write(json.dumps(msg, default=str) + \"\\n\")\n print(f\"[transcript saved: {transcript_path}]\")\n # Ask LLM to summarize\n conversation_text = json.dumps(messages, default=str)[:80000]\n response = client.messages.create(\n model=MODEL,\n messages=[{\"role\": \"user\", \"content\":\n \"Summarize this conversation for continuity. Include: \"\n \"1) What was accomplished, 2) Current state, 3) Key decisions made. \"\n \"Be concise but preserve critical details.\\n\\n\" + conversation_text}],\n max_tokens=2000,\n )\n summary = response.content[0].text\n # Replace all messages with compressed summary\n return [\n {\"role\": \"user\", \"content\": f\"[Conversation compressed. Transcript: {transcript_path}]\\n\\n{summary}\"},\n {\"role\": \"assistant\", \"content\": \"Understood. I have the context from the summary. Continuing.\"},\n ]\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n content = fp.read_text()\n if old_text not in content:\n return f\"Error: Text not found in {path}\"\n fp.write_text(content.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"compact\": lambda **kw: \"Manual compression requested.\",\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"compact\", \"description\": \"Trigger manual conversation compression.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"focus\": {\"type\": \"string\", \"description\": \"What to preserve in the summary\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Layer 1: micro_compact before each LLM call\n micro_compact(messages)\n # Layer 2: auto_compact if token estimate exceeds threshold\n if estimate_tokens(messages) > THRESHOLD:\n print(\"[auto_compact triggered]\")\n messages[:] = auto_compact(messages)\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n manual_compact = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"compact\":\n manual_compact = True\n output = \"Compressing...\"\n else:\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n # Layer 3: manual compact triggered by the compact tool\n if manual_compact:\n print(\"[manual compact]\")\n messages[:] = auto_compact(messages)\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms06 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s07",
"filename": "s07_task_system.py",
"title": "Tasks",
"subtitle": "Task Graph + Dependencies",
"loc": 202,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file",
"task_create",
"task_update",
"task_list",
"task_get"
],
"newTools": [
"task_create",
"task_update",
"task_list",
"task_get"
],
"coreAddition": "TaskManager with file-based state + dependency graph",
"keyInsight": "A file-based task graph with ordering, parallelism, and dependencies -- the coordination backbone for multi-agent work",
"classes": [
{
"name": "TaskManager",
"startLine": 46,
"endLine": 125
}
],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
"startLine": 130
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
"startLine": 136
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
"startLine": 148
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
"startLine": 157
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
"startLine": 166
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 209
}
],
"layer": "planning",
"source": "#!/usr/bin/env python3\n\"\"\"\ns07_task_system.py - Tasks\n\nTasks persist as JSON files in .tasks/ so they survive context compression.\nEach task has a dependency graph (blockedBy/blocks).\n\n .tasks/\n task_1.json {\"id\":1, \"subject\":\"...\", \"status\":\"completed\", ...}\n task_2.json {\"id\":2, \"blockedBy\":[1], \"status\":\"pending\", ...}\n task_3.json {\"id\":3, \"blockedBy\":[2], \"blocks\":[], ...}\n\n Dependency resolution:\n +----------+ +----------+ +----------+\n | task 1 | --> | task 2 | --> | task 3 |\n | complete | | blocked | | blocked |\n +----------+ +----------+ +----------+\n | ^\n +--- completing task 1 removes it from task 2's blockedBy\n\nKey insight: \"State that survives compression -- because it's outside the conversation.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTASKS_DIR = WORKDIR / \".tasks\"\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use task tools to plan and track work.\"\n\n\n# -- TaskManager: CRUD with dependency graph, persisted as JSON files --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = [int(f.stem.split(\"_\")[1]) for f in self.dir.glob(\"task_*.json\")]\n return max(ids) if ids else 0\n\n def _load(self, task_id: int) -> dict:\n path = self.dir / f\"task_{task_id}.json\"\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n path = self.dir / f\"task_{task['id']}.json\"\n path.write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id, \"subject\": subject, \"description\": description,\n \"status\": \"pending\", \"blockedBy\": [], \"blocks\": [], \"owner\": \"\",\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def update(self, task_id: int, status: str = None,\n add_blocked_by: list = None, add_blocks: list = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n # When a task is completed, remove it from all other tasks' blockedBy\n if status == \"completed\":\n self._clear_dependency(task_id)\n if add_blocked_by:\n task[\"blockedBy\"] = list(set(task[\"blockedBy\"] + add_blocked_by))\n if add_blocks:\n task[\"blocks\"] = list(set(task[\"blocks\"] + add_blocks))\n # Bidirectional: also update the blocked tasks' blockedBy lists\n for blocked_id in add_blocks:\n try:\n blocked = self._load(blocked_id)\n if task_id not in blocked[\"blockedBy\"]:\n blocked[\"blockedBy\"].append(task_id)\n self._save(blocked)\n except ValueError:\n pass\n self._save(task)\n return json.dumps(task, indent=2)\n\n def _clear_dependency(self, completed_id: int):\n \"\"\"Remove completed_id from all other tasks' blockedBy lists.\"\"\"\n for f in self.dir.glob(\"task_*.json\"):\n task = json.loads(f.read_text())\n if completed_id in task.get(\"blockedBy\", []):\n task[\"blockedBy\"].remove(completed_id)\n self._save(task)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n blocked = f\" (blocked by: {t['blockedBy']})\" if t.get(\"blockedBy\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{blocked}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(TASKS_DIR)\n\n\n# -- Base tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"addBlockedBy\"), kw.get(\"addBlocks\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"task_create\", \"description\": \"Create a new task.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"subject\": {\"type\": \"string\"}, \"description\": {\"type\": \"string\"}}, \"required\": [\"subject\"]}},\n {\"name\": \"task_update\", \"description\": \"Update a task's status or dependencies.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}, \"status\": {\"type\": \"string\", \"enum\": [\"pending\", \"in_progress\", \"completed\"]}, \"addBlockedBy\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}, \"addBlocks\": {\"type\": \"array\", \"items\": {\"type\": \"integer\"}}}, \"required\": [\"task_id\"]}},\n {\"name\": \"task_list\", \"description\": \"List all tasks with status summary.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"task_get\", \"description\": \"Get full details of a task by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms07 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s08",
"filename": "s08_background_tasks.py",
"title": "Background Tasks",
"subtitle": "Background Threads + Notifications",
"loc": 193,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file",
"background_run",
"check_background"
],
"newTools": [
"background_run",
"check_background"
],
"coreAddition": "BackgroundManager + notification queue",
"keyInsight": "Run slow operations in the background; the agent keeps thinking ahead",
"classes": [
{
"name": "BackgroundManager",
"startLine": 49,
"endLine": 109
}
],
"functions": [
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
"startLine": 114
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
"startLine": 120
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
"startLine": 132
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
"startLine": 141
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
"startLine": 150
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 187
}
],
"layer": "concurrency",
"source": "#!/usr/bin/env python3\n\"\"\"\ns08_background_tasks.py - Background Tasks\n\nRun commands in background threads. A notification queue is drained\nbefore each LLM call to deliver results.\n\n Main thread Background thread\n +-----------------+ +-----------------+\n | agent loop | | task executes |\n | ... | | ... |\n | [LLM call] <---+------- | enqueue(result) |\n | ^drain queue | +-----------------+\n +-----------------+\n\n Timeline:\n Agent ----[spawn A]----[spawn B]----[other work]----\n | |\n v v\n [A runs] [B runs] (parallel)\n | |\n +-- notification queue --> [results injected]\n\nKey insight: \"Fire and forget -- the agent doesn't block while the command runs.\"\n\"\"\"\n\nimport os\nimport subprocess\nimport threading\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\nSYSTEM = f\"You are a coding agent at {WORKDIR}. Use background_run for long-running commands.\"\n\n\n# -- BackgroundManager: threaded execution + notification queue --\nclass BackgroundManager:\n def __init__(self):\n self.tasks = {} # task_id -> {status, result, command}\n self._notification_queue = [] # completed task results\n self._lock = threading.Lock()\n\n def run(self, command: str) -> str:\n \"\"\"Start a background thread, return task_id immediately.\"\"\"\n task_id = str(uuid.uuid4())[:8]\n self.tasks[task_id] = {\"status\": \"running\", \"result\": None, \"command\": command}\n thread = threading.Thread(\n target=self._execute, args=(task_id, command), daemon=True\n )\n thread.start()\n return f\"Background task {task_id} started: {command[:80]}\"\n\n def _execute(self, task_id: str, command: str):\n \"\"\"Thread target: run subprocess, capture output, push to queue.\"\"\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=300\n )\n output = (r.stdout + r.stderr).strip()[:50000]\n status = \"completed\"\n except subprocess.TimeoutExpired:\n output = \"Error: Timeout (300s)\"\n status = \"timeout\"\n except Exception as e:\n output = f\"Error: {e}\"\n status = \"error\"\n self.tasks[task_id][\"status\"] = status\n self.tasks[task_id][\"result\"] = output or \"(no output)\"\n with self._lock:\n self._notification_queue.append({\n \"task_id\": task_id,\n \"status\": status,\n \"command\": command[:80],\n \"result\": (output or \"(no output)\")[:500],\n })\n\n def check(self, task_id: str = None) -> str:\n \"\"\"Check status of one task or list all.\"\"\"\n if task_id:\n t = self.tasks.get(task_id)\n if not t:\n return f\"Error: Unknown task {task_id}\"\n return f\"[{t['status']}] {t['command'][:60]}\\n{t.get('result') or '(running)'}\"\n lines = []\n for tid, t in self.tasks.items():\n lines.append(f\"{tid}: [{t['status']}] {t['command'][:60]}\")\n return \"\\n\".join(lines) if lines else \"No background tasks.\"\n\n def drain_notifications(self) -> list:\n \"\"\"Return and clear all pending completion notifications.\"\"\"\n with self._lock:\n notifs = list(self._notification_queue)\n self._notification_queue.clear()\n return notifs\n\n\nBG = BackgroundManager()\n\n\n# -- Tool implementations --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120)\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"background_run\": lambda **kw: BG.run(kw[\"command\"]),\n \"check_background\": lambda **kw: BG.check(kw.get(\"task_id\")),\n}\n\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command (blocking).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"background_run\", \"description\": \"Run command in background thread. Returns task_id immediately.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"check_background\", \"description\": \"Check background task status. Omit task_id to list all.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"string\"}}}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n # Drain background notifications and inject as system message before LLM call\n notifs = BG.drain_notifications()\n if notifs and messages:\n notif_text = \"\\n\".join(\n f\"[bg:{n['task_id']}] {n['status']}: {n['result']}\" for n in notifs\n )\n messages.append({\"role\": \"user\", \"content\": f\"\\n{notif_text}\\n \"})\n messages.append({\"role\": \"assistant\", \"content\": \"Noted background results.\"})\n response = client.messages.create(\n model=MODEL, system=SYSTEM, messages=messages,\n tools=TOOLS, max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\"type\": \"tool_result\", \"tool_use_id\": block.id, \"content\": str(output)})\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms08 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s09",
"filename": "s09_agent_teams.py",
"title": "Agent Teams",
"subtitle": "Teammates + Mailboxes",
"loc": 343,
"tools": [
"alice",
"bash",
"read_file",
"write_file",
"edit_file",
"send_message",
"read_inbox",
"spawn_teammate",
"list_teammates",
"broadcast"
],
"newTools": [
"alice",
"send_message",
"read_inbox",
"spawn_teammate",
"list_teammates",
"broadcast"
],
"coreAddition": "TeammateManager + file-based mailbox",
"keyInsight": "When one agent can't finish, delegate to persistent teammates via async mailboxes",
"classes": [
{
"name": "MessageBus",
"startLine": 77,
"endLine": 118
},
{
"name": "TeammateManager",
"startLine": 123,
"endLine": 249
}
],
"functions": [
{
"name": "_safe_path",
"signature": "def _safe_path(p: str)",
"startLine": 254
},
{
"name": "_run_bash",
"signature": "def _run_bash(command: str)",
"startLine": 261
},
{
"name": "_run_read",
"signature": "def _run_read(path: str, limit: int = None)",
"startLine": 276
},
{
"name": "_run_write",
"signature": "def _run_write(path: str, content: str)",
"startLine": 286
},
{
"name": "_run_edit",
"signature": "def _run_edit(path: str, old_text: str, new_text: str)",
"startLine": 296
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 344
}
],
"layer": "collaboration",
"source": "#!/usr/bin/env python3\n\"\"\"\ns09_agent_teams.py - Agent Teams\n\nPersistent named agents with file-based JSONL inboxes. Each teammate runs\nits own agent loop in a separate thread. Communication via append-only inboxes.\n\n Subagent (s04): spawn -> execute -> return summary -> destroyed\n Teammate (s09): spawn -> work -> idle -> work -> ... -> shutdown\n\n .team/config.json .team/inbox/\n +----------------------------+ +------------------+\n | {\"team_name\": \"default\", | | alice.jsonl |\n | \"members\": [ | | bob.jsonl |\n | {\"name\":\"alice\", | | lead.jsonl |\n | \"role\":\"coder\", | +------------------+\n | \"status\":\"idle\"} |\n | ]} | send_message(\"alice\", \"fix bug\"):\n +----------------------------+ open(\"alice.jsonl\", \"a\").write(msg)\n\n read_inbox(\"alice\"):\n spawn_teammate(\"alice\",\"coder\",...) msgs = [json.loads(l) for l in ...]\n | open(\"alice.jsonl\", \"w\").close()\n v return msgs # drain\n Thread: alice Thread: bob\n +------------------+ +------------------+\n | agent_loop | | agent_loop |\n | status: working | | status: idle |\n | ... runs tools | | ... waits ... |\n | status -> idle | | |\n +------------------+ +------------------+\n\n 5 message types (all declared, not all handled here):\n +-------------------------+-----------------------------------+\n | message | Normal text message |\n | broadcast | Sent to all teammates |\n | shutdown_request | Request graceful shutdown (s10) |\n | shutdown_response | Approve/reject shutdown (s10) |\n | plan_approval_response | Approve/reject plan (s10) |\n +-------------------------+-----------------------------------+\n\nKey insight: \"Teammates that can talk to each other.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Spawn teammates and communicate via inboxes.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager: persistent named agents with config.json --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Use send_message to communicate. Complete your task.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member and member[\"status\"] != \"shutdown\":\n member[\"status\"] = \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead tool dispatch (9 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate that runs in its own thread.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates with name, role, status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms09 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s10",
"filename": "s10_team_protocols.py",
"title": "Team Protocols",
"subtitle": "Shared Communication Rules",
"loc": 414,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file",
"send_message",
"read_inbox",
"shutdown_response",
"plan_approval",
"spawn_teammate",
"list_teammates",
"broadcast",
"shutdown_request"
],
"newTools": [
"shutdown_response",
"plan_approval",
"shutdown_request"
],
"coreAddition": "request_id correlation for two protocols",
"keyInsight": "One request-response pattern drives all team negotiation",
"classes": [
{
"name": "MessageBus",
"startLine": 87,
"endLine": 128
},
{
"name": "TeammateManager",
"startLine": 133,
"endLine": 290
}
],
"functions": [
{
"name": "_safe_path",
"signature": "def _safe_path(p: str)",
"startLine": 295
},
{
"name": "_run_bash",
"signature": "def _run_bash(command: str)",
"startLine": 302
},
{
"name": "_run_read",
"signature": "def _run_read(path: str, limit: int = None)",
"startLine": 317
},
{
"name": "_run_write",
"signature": "def _run_write(path: str, content: str)",
"startLine": 327
},
{
"name": "_run_edit",
"signature": "def _run_edit(path: str, old_text: str, new_text: str)",
"startLine": 337
},
{
"name": "handle_shutdown_request",
"signature": "def handle_shutdown_request(teammate: str)",
"startLine": 350
},
{
"name": "handle_plan_review",
"signature": "def handle_plan_review(request_id: str, approve: bool, feedback: str = \"\")",
"startLine": 361
},
{
"name": "_check_shutdown_status",
"signature": "def _check_shutdown_status(request_id: str)",
"startLine": 375
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 425
}
],
"layer": "collaboration",
"source": "#!/usr/bin/env python3\n\"\"\"\ns10_team_protocols.py - Team Protocols\n\nShutdown protocol and plan approval protocol, both using the same\nrequest_id correlation pattern. Builds on s09's team messaging.\n\n Shutdown FSM: pending -> approved | rejected\n\n Lead Teammate\n +---------------------+ +---------------------+\n | shutdown_request | | |\n | { | -------> | receives request |\n | request_id: abc | | decides: approve? |\n | } | | |\n +---------------------+ +---------------------+\n |\n +---------------------+ +-------v-------------+\n | shutdown_response | <------- | shutdown_response |\n | { | | { |\n | request_id: abc | | request_id: abc |\n | approve: true | | approve: true |\n | } | | } |\n +---------------------+ +---------------------+\n |\n v\n status -> \"shutdown\", thread stops\n\n Plan approval FSM: pending -> approved | rejected\n\n Teammate Lead\n +---------------------+ +---------------------+\n | plan_approval | | |\n | submit: {plan:\"...\"}| -------> | reviews plan text |\n +---------------------+ | approve/reject? |\n +---------------------+\n |\n +---------------------+ +-------v-------------+\n | plan_approval_resp | <------- | plan_approval |\n | {approve: true} | | review: {req_id, |\n +---------------------+ | approve: true} |\n +---------------------+\n\n Trackers: {request_id: {\"target|from\": name, \"status\": \"pending|...\"}}\n\nKey insight: \"Same request_id correlation pattern, two domains.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Manage teammates with shutdown and plan approval protocols.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers: correlate by request_id --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- TeammateManager with shutdown + plan approval --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._teammate_loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _teammate_loop(self, name: str, role: str, prompt: str):\n sys_prompt = (\n f\"You are '{name}', role: {role}, at {WORKDIR}. \"\n f\"Submit plans via plan_approval before major work. \"\n f\"Respond to shutdown_request with shutdown_response.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n should_exit = False\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n if should_exit:\n break\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n break\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n if block.name == \"shutdown_response\" and block.input.get(\"approve\"):\n should_exit = True\n messages.append({\"role\": \"user\", \"content\": results})\n member = self._find_member(name)\n if member:\n member[\"status\"] = \"shutdown\" if should_exit else \"idle\"\n self._save_config()\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n approve = args[\"approve\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": approve},\n )\n return f\"Shutdown {'approved' if approve else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for lead approval.\"\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request. Approve to shut down, reject to keep working.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval. Provide plan text.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}' (status: pending)\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (12 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn a persistent teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down gracefully. Returns a request_id for tracking.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check the status of a shutdown request by request_id.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan. Provide request_id + approve + optional feedback.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms10 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s11",
"filename": "s11_autonomous_agents.py",
"title": "Autonomous Agents",
"subtitle": "Scan Board, Claim Tasks",
"loc": 494,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file",
"send_message",
"read_inbox",
"shutdown_response",
"plan_approval",
"idle",
"claim_task",
"spawn_teammate",
"list_teammates",
"broadcast",
"shutdown_request"
],
"newTools": [
"idle",
"claim_task"
],
"coreAddition": "Task board polling + timeout-based self-governance",
"keyInsight": "Teammates scan the board and claim tasks themselves; no need for the lead to assign each one",
"classes": [
{
"name": "MessageBus",
"startLine": 80,
"endLine": 121
},
{
"name": "TeammateManager",
"startLine": 159,
"endLine": 368
}
],
"functions": [
{
"name": "scan_unclaimed_tasks",
"signature": "def scan_unclaimed_tasks()",
"startLine": 126
},
{
"name": "claim_task",
"signature": "def claim_task(task_id: int, owner: str)",
"startLine": 138
},
{
"name": "make_identity_block",
"signature": "def make_identity_block(name: str, role: str, team_name: str)",
"startLine": 151
},
{
"name": "_safe_path",
"signature": "def _safe_path(p: str)",
"startLine": 373
},
{
"name": "_run_bash",
"signature": "def _run_bash(command: str)",
"startLine": 380
},
{
"name": "_run_read",
"signature": "def _run_read(path: str, limit: int = None)",
"startLine": 395
},
{
"name": "_run_write",
"signature": "def _run_write(path: str, content: str)",
"startLine": 405
},
{
"name": "_run_edit",
"signature": "def _run_edit(path: str, old_text: str, new_text: str)",
"startLine": 415
},
{
"name": "handle_shutdown_request",
"signature": "def handle_shutdown_request(teammate: str)",
"startLine": 428
},
{
"name": "handle_plan_review",
"signature": "def handle_plan_review(request_id: str, approve: bool, feedback: str = \"\")",
"startLine": 439
},
{
"name": "_check_shutdown_status",
"signature": "def _check_shutdown_status(request_id: str)",
"startLine": 453
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 509
}
],
"layer": "collaboration",
"source": "#!/usr/bin/env python3\n\"\"\"\ns11_autonomous_agents.py - Autonomous Agents\n\nIdle cycle with task board polling, auto-claiming unclaimed tasks, and\nidentity re-injection after context compression. Builds on s10's protocols.\n\n Teammate lifecycle:\n +-------+\n | spawn |\n +---+---+\n |\n v\n +-------+ tool_use +-------+\n | WORK | <----------- | LLM |\n +---+---+ +-------+\n |\n | stop_reason != tool_use\n v\n +--------+\n | IDLE | poll every 5s for up to 60s\n +---+----+\n |\n +---> check inbox -> message? -> resume WORK\n |\n +---> scan .tasks/ -> unclaimed? -> claim -> resume WORK\n |\n +---> timeout (60s) -> shutdown\n\n Identity re-injection after compression:\n messages = [identity_block, ...remaining...]\n \"You are 'coder', role: backend, team: my-team\"\n\nKey insight: \"The agent finds work itself.\"\n\"\"\"\n\nimport json\nimport os\nimport subprocess\nimport threading\nimport time\nimport uuid\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\nTEAM_DIR = WORKDIR / \".team\"\nINBOX_DIR = TEAM_DIR / \"inbox\"\nTASKS_DIR = WORKDIR / \".tasks\"\n\nPOLL_INTERVAL = 5\nIDLE_TIMEOUT = 60\n\nSYSTEM = f\"You are a team lead at {WORKDIR}. Teammates are autonomous -- they find work themselves.\"\n\nVALID_MSG_TYPES = {\n \"message\",\n \"broadcast\",\n \"shutdown_request\",\n \"shutdown_response\",\n \"plan_approval_response\",\n}\n\n# -- Request trackers --\nshutdown_requests = {}\nplan_requests = {}\n_tracker_lock = threading.Lock()\n_claim_lock = threading.Lock()\n\n\n# -- MessageBus: JSONL inbox per teammate --\nclass MessageBus:\n def __init__(self, inbox_dir: Path):\n self.dir = inbox_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n\n def send(self, sender: str, to: str, content: str,\n msg_type: str = \"message\", extra: dict = None) -> str:\n if msg_type not in VALID_MSG_TYPES:\n return f\"Error: Invalid type '{msg_type}'. Valid: {VALID_MSG_TYPES}\"\n msg = {\n \"type\": msg_type,\n \"from\": sender,\n \"content\": content,\n \"timestamp\": time.time(),\n }\n if extra:\n msg.update(extra)\n inbox_path = self.dir / f\"{to}.jsonl\"\n with open(inbox_path, \"a\") as f:\n f.write(json.dumps(msg) + \"\\n\")\n return f\"Sent {msg_type} to {to}\"\n\n def read_inbox(self, name: str) -> list:\n inbox_path = self.dir / f\"{name}.jsonl\"\n if not inbox_path.exists():\n return []\n messages = []\n for line in inbox_path.read_text().strip().splitlines():\n if line:\n messages.append(json.loads(line))\n inbox_path.write_text(\"\")\n return messages\n\n def broadcast(self, sender: str, content: str, teammates: list) -> str:\n count = 0\n for name in teammates:\n if name != sender:\n self.send(sender, name, content, \"broadcast\")\n count += 1\n return f\"Broadcast to {count} teammates\"\n\n\nBUS = MessageBus(INBOX_DIR)\n\n\n# -- Task board scanning --\ndef scan_unclaimed_tasks() -> list:\n TASKS_DIR.mkdir(exist_ok=True)\n unclaimed = []\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n task = json.loads(f.read_text())\n if (task.get(\"status\") == \"pending\"\n and not task.get(\"owner\")\n and not task.get(\"blockedBy\")):\n unclaimed.append(task)\n return unclaimed\n\n\ndef claim_task(task_id: int, owner: str) -> str:\n with _claim_lock:\n path = TASKS_DIR / f\"task_{task_id}.json\"\n if not path.exists():\n return f\"Error: Task {task_id} not found\"\n task = json.loads(path.read_text())\n task[\"owner\"] = owner\n task[\"status\"] = \"in_progress\"\n path.write_text(json.dumps(task, indent=2))\n return f\"Claimed task #{task_id} for {owner}\"\n\n\n# -- Identity re-injection after compression --\ndef make_identity_block(name: str, role: str, team_name: str) -> dict:\n return {\n \"role\": \"user\",\n \"content\": f\"You are '{name}', role: {role}, team: {team_name}. Continue your work. \",\n }\n\n\n# -- Autonomous TeammateManager --\nclass TeammateManager:\n def __init__(self, team_dir: Path):\n self.dir = team_dir\n self.dir.mkdir(exist_ok=True)\n self.config_path = self.dir / \"config.json\"\n self.config = self._load_config()\n self.threads = {}\n\n def _load_config(self) -> dict:\n if self.config_path.exists():\n return json.loads(self.config_path.read_text())\n return {\"team_name\": \"default\", \"members\": []}\n\n def _save_config(self):\n self.config_path.write_text(json.dumps(self.config, indent=2))\n\n def _find_member(self, name: str) -> dict:\n for m in self.config[\"members\"]:\n if m[\"name\"] == name:\n return m\n return None\n\n def _set_status(self, name: str, status: str):\n member = self._find_member(name)\n if member:\n member[\"status\"] = status\n self._save_config()\n\n def spawn(self, name: str, role: str, prompt: str) -> str:\n member = self._find_member(name)\n if member:\n if member[\"status\"] not in (\"idle\", \"shutdown\"):\n return f\"Error: '{name}' is currently {member['status']}\"\n member[\"status\"] = \"working\"\n member[\"role\"] = role\n else:\n member = {\"name\": name, \"role\": role, \"status\": \"working\"}\n self.config[\"members\"].append(member)\n self._save_config()\n thread = threading.Thread(\n target=self._loop,\n args=(name, role, prompt),\n daemon=True,\n )\n self.threads[name] = thread\n thread.start()\n return f\"Spawned '{name}' (role: {role})\"\n\n def _loop(self, name: str, role: str, prompt: str):\n team_name = self.config[\"team_name\"]\n sys_prompt = (\n f\"You are '{name}', role: {role}, team: {team_name}, at {WORKDIR}. \"\n f\"Use idle tool when you have no more work. You will auto-claim new tasks.\"\n )\n messages = [{\"role\": \"user\", \"content\": prompt}]\n tools = self._teammate_tools()\n\n while True:\n # -- WORK PHASE: standard agent loop --\n for _ in range(50):\n inbox = BUS.read_inbox(name)\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n try:\n response = client.messages.create(\n model=MODEL,\n system=sys_prompt,\n messages=messages,\n tools=tools,\n max_tokens=8000,\n )\n except Exception:\n self._set_status(name, \"idle\")\n return\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n break\n results = []\n idle_requested = False\n for block in response.content:\n if block.type == \"tool_use\":\n if block.name == \"idle\":\n idle_requested = True\n output = \"Entering idle phase. Will poll for new tasks.\"\n else:\n output = self._exec(name, block.name, block.input)\n print(f\" [{name}] {block.name}: {str(output)[:120]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n if idle_requested:\n break\n\n # -- IDLE PHASE: poll for inbox messages and unclaimed tasks --\n self._set_status(name, \"idle\")\n resume = False\n polls = IDLE_TIMEOUT // max(POLL_INTERVAL, 1)\n for _ in range(polls):\n time.sleep(POLL_INTERVAL)\n inbox = BUS.read_inbox(name)\n if inbox:\n for msg in inbox:\n if msg.get(\"type\") == \"shutdown_request\":\n self._set_status(name, \"shutdown\")\n return\n messages.append({\"role\": \"user\", \"content\": json.dumps(msg)})\n resume = True\n break\n unclaimed = scan_unclaimed_tasks()\n if unclaimed:\n task = unclaimed[0]\n claim_task(task[\"id\"], name)\n task_prompt = (\n f\"Task #{task['id']}: {task['subject']}\\n\"\n f\"{task.get('description', '')} \"\n )\n if len(messages) <= 3:\n messages.insert(0, make_identity_block(name, role, team_name))\n messages.insert(1, {\"role\": \"assistant\", \"content\": f\"I am {name}. Continuing.\"})\n messages.append({\"role\": \"user\", \"content\": task_prompt})\n messages.append({\"role\": \"assistant\", \"content\": f\"Claimed task #{task['id']}. Working on it.\"})\n resume = True\n break\n\n if not resume:\n self._set_status(name, \"shutdown\")\n return\n self._set_status(name, \"working\")\n\n def _exec(self, sender: str, tool_name: str, args: dict) -> str:\n # these base tools are unchanged from s02\n if tool_name == \"bash\":\n return _run_bash(args[\"command\"])\n if tool_name == \"read_file\":\n return _run_read(args[\"path\"])\n if tool_name == \"write_file\":\n return _run_write(args[\"path\"], args[\"content\"])\n if tool_name == \"edit_file\":\n return _run_edit(args[\"path\"], args[\"old_text\"], args[\"new_text\"])\n if tool_name == \"send_message\":\n return BUS.send(sender, args[\"to\"], args[\"content\"], args.get(\"msg_type\", \"message\"))\n if tool_name == \"read_inbox\":\n return json.dumps(BUS.read_inbox(sender), indent=2)\n if tool_name == \"shutdown_response\":\n req_id = args[\"request_id\"]\n with _tracker_lock:\n if req_id in shutdown_requests:\n shutdown_requests[req_id][\"status\"] = \"approved\" if args[\"approve\"] else \"rejected\"\n BUS.send(\n sender, \"lead\", args.get(\"reason\", \"\"),\n \"shutdown_response\", {\"request_id\": req_id, \"approve\": args[\"approve\"]},\n )\n return f\"Shutdown {'approved' if args['approve'] else 'rejected'}\"\n if tool_name == \"plan_approval\":\n plan_text = args.get(\"plan\", \"\")\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n plan_requests[req_id] = {\"from\": sender, \"plan\": plan_text, \"status\": \"pending\"}\n BUS.send(\n sender, \"lead\", plan_text, \"plan_approval_response\",\n {\"request_id\": req_id, \"plan\": plan_text},\n )\n return f\"Plan submitted (request_id={req_id}). Waiting for approval.\"\n if tool_name == \"claim_task\":\n return claim_task(args[\"task_id\"], sender)\n return f\"Unknown tool: {tool_name}\"\n\n def _teammate_tools(self) -> list:\n # these base tools are unchanged from s02\n return [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"send_message\", \"description\": \"Send message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain your inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"shutdown_response\", \"description\": \"Respond to a shutdown request.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"reason\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Submit a plan for lead approval.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"plan\": {\"type\": \"string\"}}, \"required\": [\"plan\"]}},\n {\"name\": \"idle\", \"description\": \"Signal that you have no more work. Enters idle polling phase.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the task board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n ]\n\n def list_all(self) -> str:\n if not self.config[\"members\"]:\n return \"No teammates.\"\n lines = [f\"Team: {self.config['team_name']}\"]\n for m in self.config[\"members\"]:\n lines.append(f\" {m['name']} ({m['role']}): {m['status']}\")\n return \"\\n\".join(lines)\n\n def member_names(self) -> list:\n return [m[\"name\"] for m in self.config[\"members\"]]\n\n\nTEAM = TeammateManager(TEAM_DIR)\n\n\n# -- Base tool implementations (these base tools are unchanged from s02) --\ndef _safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef _run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command, shell=True, cwd=WORKDIR,\n capture_output=True, text=True, timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef _run_read(path: str, limit: int = None) -> str:\n try:\n lines = _safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_write(path: str, content: str) -> str:\n try:\n fp = _safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef _run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = _safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\n# -- Lead-specific protocol handlers --\ndef handle_shutdown_request(teammate: str) -> str:\n req_id = str(uuid.uuid4())[:8]\n with _tracker_lock:\n shutdown_requests[req_id] = {\"target\": teammate, \"status\": \"pending\"}\n BUS.send(\n \"lead\", teammate, \"Please shut down gracefully.\",\n \"shutdown_request\", {\"request_id\": req_id},\n )\n return f\"Shutdown request {req_id} sent to '{teammate}'\"\n\n\ndef handle_plan_review(request_id: str, approve: bool, feedback: str = \"\") -> str:\n with _tracker_lock:\n req = plan_requests.get(request_id)\n if not req:\n return f\"Error: Unknown plan request_id '{request_id}'\"\n with _tracker_lock:\n req[\"status\"] = \"approved\" if approve else \"rejected\"\n BUS.send(\n \"lead\", req[\"from\"], feedback, \"plan_approval_response\",\n {\"request_id\": request_id, \"approve\": approve, \"feedback\": feedback},\n )\n return f\"Plan {req['status']} for '{req['from']}'\"\n\n\ndef _check_shutdown_status(request_id: str) -> str:\n with _tracker_lock:\n return json.dumps(shutdown_requests.get(request_id, {\"error\": \"not found\"}))\n\n\n# -- Lead tool dispatch (14 tools) --\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: _run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: _run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: _run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: _run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"spawn_teammate\": lambda **kw: TEAM.spawn(kw[\"name\"], kw[\"role\"], kw[\"prompt\"]),\n \"list_teammates\": lambda **kw: TEAM.list_all(),\n \"send_message\": lambda **kw: BUS.send(\"lead\", kw[\"to\"], kw[\"content\"], kw.get(\"msg_type\", \"message\")),\n \"read_inbox\": lambda **kw: json.dumps(BUS.read_inbox(\"lead\"), indent=2),\n \"broadcast\": lambda **kw: BUS.broadcast(\"lead\", kw[\"content\"], TEAM.member_names()),\n \"shutdown_request\": lambda **kw: handle_shutdown_request(kw[\"teammate\"]),\n \"shutdown_response\": lambda **kw: _check_shutdown_status(kw.get(\"request_id\", \"\")),\n \"plan_approval\": lambda **kw: handle_plan_review(kw[\"request_id\"], kw[\"approve\"], kw.get(\"feedback\", \"\")),\n \"idle\": lambda **kw: \"Lead does not idle.\",\n \"claim_task\": lambda **kw: claim_task(kw[\"task_id\"], \"lead\"),\n}\n\n# these base tools are unchanged from s02\nTOOLS = [\n {\"name\": \"bash\", \"description\": \"Run a shell command.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"command\": {\"type\": \"string\"}}, \"required\": [\"command\"]}},\n {\"name\": \"read_file\", \"description\": \"Read file contents.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"limit\": {\"type\": \"integer\"}}, \"required\": [\"path\"]}},\n {\"name\": \"write_file\", \"description\": \"Write content to file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}}, \"required\": [\"path\", \"content\"]}},\n {\"name\": \"edit_file\", \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"path\": {\"type\": \"string\"}, \"old_text\": {\"type\": \"string\"}, \"new_text\": {\"type\": \"string\"}}, \"required\": [\"path\", \"old_text\", \"new_text\"]}},\n {\"name\": \"spawn_teammate\", \"description\": \"Spawn an autonomous teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"name\": {\"type\": \"string\"}, \"role\": {\"type\": \"string\"}, \"prompt\": {\"type\": \"string\"}}, \"required\": [\"name\", \"role\", \"prompt\"]}},\n {\"name\": \"list_teammates\", \"description\": \"List all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"send_message\", \"description\": \"Send a message to a teammate.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"to\": {\"type\": \"string\"}, \"content\": {\"type\": \"string\"}, \"msg_type\": {\"type\": \"string\", \"enum\": list(VALID_MSG_TYPES)}}, \"required\": [\"to\", \"content\"]}},\n {\"name\": \"read_inbox\", \"description\": \"Read and drain the lead's inbox.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"broadcast\", \"description\": \"Send a message to all teammates.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"content\": {\"type\": \"string\"}}, \"required\": [\"content\"]}},\n {\"name\": \"shutdown_request\", \"description\": \"Request a teammate to shut down.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"teammate\": {\"type\": \"string\"}}, \"required\": [\"teammate\"]}},\n {\"name\": \"shutdown_response\", \"description\": \"Check shutdown request status.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}}, \"required\": [\"request_id\"]}},\n {\"name\": \"plan_approval\", \"description\": \"Approve or reject a teammate's plan.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"request_id\": {\"type\": \"string\"}, \"approve\": {\"type\": \"boolean\"}, \"feedback\": {\"type\": \"string\"}}, \"required\": [\"request_id\", \"approve\"]}},\n {\"name\": \"idle\", \"description\": \"Enter idle state (for lead -- rarely used).\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}}},\n {\"name\": \"claim_task\", \"description\": \"Claim a task from the board by ID.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {\"task_id\": {\"type\": \"integer\"}}, \"required\": [\"task_id\"]}},\n]\n\n\ndef agent_loop(messages: list):\n while True:\n inbox = BUS.read_inbox(\"lead\")\n if inbox:\n messages.append({\n \"role\": \"user\",\n \"content\": f\"{json.dumps(inbox, indent=2)} \",\n })\n messages.append({\n \"role\": \"assistant\",\n \"content\": \"Noted inbox messages.\",\n })\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append({\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n })\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n history = []\n while True:\n try:\n query = input(\"\\033[36ms11 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n if query.strip() == \"/team\":\n print(TEAM.list_all())\n continue\n if query.strip() == \"/inbox\":\n print(json.dumps(BUS.read_inbox(\"lead\"), indent=2))\n continue\n if query.strip() == \"/tasks\":\n TASKS_DIR.mkdir(exist_ok=True)\n for f in sorted(TASKS_DIR.glob(\"task_*.json\")):\n t = json.loads(f.read_text())\n marker = {\"pending\": \"[ ]\", \"in_progress\": \"[>]\", \"completed\": \"[x]\"}.get(t[\"status\"], \"[?]\")\n owner = f\" @{t['owner']}\" if t.get(\"owner\") else \"\"\n print(f\" {marker} #{t['id']}: {t['subject']}{owner}\")\n continue\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
},
{
"id": "s12",
"filename": "s12_worktree_task_isolation.py",
"title": "Worktree + Task Isolation",
"subtitle": "Isolate by Directory",
"loc": 689,
"tools": [
"bash",
"read_file",
"write_file",
"edit_file",
"task_create",
"task_list",
"task_get",
"task_update",
"task_bind_worktree",
"worktree_create",
"worktree_list",
"worktree_status",
"worktree_run",
"worktree_remove",
"worktree_keep",
"worktree_events"
],
"newTools": [
"task_create",
"task_list",
"task_get",
"task_update",
"task_bind_worktree",
"worktree_create",
"worktree_list",
"worktree_status",
"worktree_run",
"worktree_remove",
"worktree_keep",
"worktree_events"
],
"coreAddition": "Composable worktree lifecycle + event stream over a shared task board",
"keyInsight": "Each works in its own directory; tasks manage goals, worktrees manage directories, bound by ID",
"classes": [
{
"name": "EventBus",
"startLine": 82,
"endLine": 120
},
{
"name": "TaskManager",
"startLine": 121,
"endLine": 218
},
{
"name": "WorktreeManager",
"startLine": 224,
"endLine": 472
}
],
"functions": [
{
"name": "detect_repo_root",
"signature": "def detect_repo_root(cwd: Path)",
"startLine": 52
},
{
"name": "safe_path",
"signature": "def safe_path(p: str)",
"startLine": 477
},
{
"name": "run_bash",
"signature": "def run_bash(command: str)",
"startLine": 484
},
{
"name": "run_read",
"signature": "def run_read(path: str, limit: int = None)",
"startLine": 503
},
{
"name": "run_write",
"signature": "def run_write(path: str, content: str)",
"startLine": 513
},
{
"name": "run_edit",
"signature": "def run_edit(path: str, old_text: str, new_text: str)",
"startLine": 523
},
{
"name": "agent_loop",
"signature": "def agent_loop(messages: list)",
"startLine": 728
}
],
"layer": "collaboration",
"source": "#!/usr/bin/env python3\n\"\"\"\ns12_worktree_task_isolation.py - Worktree + Task Isolation\n\nDirectory-level isolation for parallel task execution.\nTasks are the control plane and worktrees are the execution plane.\n\n .tasks/task_12.json\n {\n \"id\": 12,\n \"subject\": \"Implement auth refactor\",\n \"status\": \"in_progress\",\n \"worktree\": \"auth-refactor\"\n }\n\n .worktrees/index.json\n {\n \"worktrees\": [\n {\n \"name\": \"auth-refactor\",\n \"path\": \".../.worktrees/auth-refactor\",\n \"branch\": \"wt/auth-refactor\",\n \"task_id\": 12,\n \"status\": \"active\"\n }\n ]\n }\n\nKey insight: \"Isolate by directory, coordinate by task ID.\"\n\"\"\"\n\nimport json\nimport os\nimport re\nimport subprocess\nimport time\nfrom pathlib import Path\n\nfrom anthropic import Anthropic\nfrom dotenv import load_dotenv\n\nload_dotenv(override=True)\n\nif os.getenv(\"ANTHROPIC_BASE_URL\"):\n os.environ.pop(\"ANTHROPIC_AUTH_TOKEN\", None)\n\nWORKDIR = Path.cwd()\nclient = Anthropic(base_url=os.getenv(\"ANTHROPIC_BASE_URL\"))\nMODEL = os.environ[\"MODEL_ID\"]\n\n\ndef detect_repo_root(cwd: Path) -> Path | None:\n \"\"\"Return git repo root if cwd is inside a repo, else None.\"\"\"\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--show-toplevel\"],\n cwd=cwd,\n capture_output=True,\n text=True,\n timeout=10,\n )\n if r.returncode != 0:\n return None\n root = Path(r.stdout.strip())\n return root if root.exists() else None\n except Exception:\n return None\n\n\nREPO_ROOT = detect_repo_root(WORKDIR) or WORKDIR\n\nSYSTEM = (\n f\"You are a coding agent at {WORKDIR}. \"\n \"Use task + worktree tools for multi-task work. \"\n \"For parallel or risky changes: create tasks, allocate worktree lanes, \"\n \"run commands in those lanes, then choose keep/remove for closeout. \"\n \"Use worktree_events when you need lifecycle visibility.\"\n)\n\n\n# -- EventBus: append-only lifecycle events for observability --\nclass EventBus:\n def __init__(self, event_log_path: Path):\n self.path = event_log_path\n self.path.parent.mkdir(parents=True, exist_ok=True)\n if not self.path.exists():\n self.path.write_text(\"\")\n\n def emit(\n self,\n event: str,\n task: dict | None = None,\n worktree: dict | None = None,\n error: str | None = None,\n ):\n payload = {\n \"event\": event,\n \"ts\": time.time(),\n \"task\": task or {},\n \"worktree\": worktree or {},\n }\n if error:\n payload[\"error\"] = error\n with self.path.open(\"a\", encoding=\"utf-8\") as f:\n f.write(json.dumps(payload) + \"\\n\")\n\n def list_recent(self, limit: int = 20) -> str:\n n = max(1, min(int(limit or 20), 200))\n lines = self.path.read_text(encoding=\"utf-8\").splitlines()\n recent = lines[-n:]\n items = []\n for line in recent:\n try:\n items.append(json.loads(line))\n except Exception:\n items.append({\"event\": \"parse_error\", \"raw\": line})\n return json.dumps(items, indent=2)\n\n\n# -- TaskManager: persistent task board with optional worktree binding --\nclass TaskManager:\n def __init__(self, tasks_dir: Path):\n self.dir = tasks_dir\n self.dir.mkdir(parents=True, exist_ok=True)\n self._next_id = self._max_id() + 1\n\n def _max_id(self) -> int:\n ids = []\n for f in self.dir.glob(\"task_*.json\"):\n try:\n ids.append(int(f.stem.split(\"_\")[1]))\n except Exception:\n pass\n return max(ids) if ids else 0\n\n def _path(self, task_id: int) -> Path:\n return self.dir / f\"task_{task_id}.json\"\n\n def _load(self, task_id: int) -> dict:\n path = self._path(task_id)\n if not path.exists():\n raise ValueError(f\"Task {task_id} not found\")\n return json.loads(path.read_text())\n\n def _save(self, task: dict):\n self._path(task[\"id\"]).write_text(json.dumps(task, indent=2))\n\n def create(self, subject: str, description: str = \"\") -> str:\n task = {\n \"id\": self._next_id,\n \"subject\": subject,\n \"description\": description,\n \"status\": \"pending\",\n \"owner\": \"\",\n \"worktree\": \"\",\n \"blockedBy\": [],\n \"created_at\": time.time(),\n \"updated_at\": time.time(),\n }\n self._save(task)\n self._next_id += 1\n return json.dumps(task, indent=2)\n\n def get(self, task_id: int) -> str:\n return json.dumps(self._load(task_id), indent=2)\n\n def exists(self, task_id: int) -> bool:\n return self._path(task_id).exists()\n\n def update(self, task_id: int, status: str = None, owner: str = None) -> str:\n task = self._load(task_id)\n if status:\n if status not in (\"pending\", \"in_progress\", \"completed\"):\n raise ValueError(f\"Invalid status: {status}\")\n task[\"status\"] = status\n if owner is not None:\n task[\"owner\"] = owner\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def bind_worktree(self, task_id: int, worktree: str, owner: str = \"\") -> str:\n task = self._load(task_id)\n task[\"worktree\"] = worktree\n if owner:\n task[\"owner\"] = owner\n if task[\"status\"] == \"pending\":\n task[\"status\"] = \"in_progress\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def unbind_worktree(self, task_id: int) -> str:\n task = self._load(task_id)\n task[\"worktree\"] = \"\"\n task[\"updated_at\"] = time.time()\n self._save(task)\n return json.dumps(task, indent=2)\n\n def list_all(self) -> str:\n tasks = []\n for f in sorted(self.dir.glob(\"task_*.json\")):\n tasks.append(json.loads(f.read_text()))\n if not tasks:\n return \"No tasks.\"\n lines = []\n for t in tasks:\n marker = {\n \"pending\": \"[ ]\",\n \"in_progress\": \"[>]\",\n \"completed\": \"[x]\",\n }.get(t[\"status\"], \"[?]\")\n owner = f\" owner={t['owner']}\" if t.get(\"owner\") else \"\"\n wt = f\" wt={t['worktree']}\" if t.get(\"worktree\") else \"\"\n lines.append(f\"{marker} #{t['id']}: {t['subject']}{owner}{wt}\")\n return \"\\n\".join(lines)\n\n\nTASKS = TaskManager(REPO_ROOT / \".tasks\")\nEVENTS = EventBus(REPO_ROOT / \".worktrees\" / \"events.jsonl\")\n\n\n# -- WorktreeManager: create/list/run/remove git worktrees + lifecycle index --\nclass WorktreeManager:\n def __init__(self, repo_root: Path, tasks: TaskManager, events: EventBus):\n self.repo_root = repo_root\n self.tasks = tasks\n self.events = events\n self.dir = repo_root / \".worktrees\"\n self.dir.mkdir(parents=True, exist_ok=True)\n self.index_path = self.dir / \"index.json\"\n if not self.index_path.exists():\n self.index_path.write_text(json.dumps({\"worktrees\": []}, indent=2))\n self.git_available = self._is_git_repo()\n\n def _is_git_repo(self) -> bool:\n try:\n r = subprocess.run(\n [\"git\", \"rev-parse\", \"--is-inside-work-tree\"],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=10,\n )\n return r.returncode == 0\n except Exception:\n return False\n\n def _run_git(self, args: list[str]) -> str:\n if not self.git_available:\n raise RuntimeError(\"Not in a git repository. worktree tools require git.\")\n r = subprocess.run(\n [\"git\", *args],\n cwd=self.repo_root,\n capture_output=True,\n text=True,\n timeout=120,\n )\n if r.returncode != 0:\n msg = (r.stdout + r.stderr).strip()\n raise RuntimeError(msg or f\"git {' '.join(args)} failed\")\n return (r.stdout + r.stderr).strip() or \"(no output)\"\n\n def _load_index(self) -> dict:\n return json.loads(self.index_path.read_text())\n\n def _save_index(self, data: dict):\n self.index_path.write_text(json.dumps(data, indent=2))\n\n def _find(self, name: str) -> dict | None:\n idx = self._load_index()\n for wt in idx.get(\"worktrees\", []):\n if wt.get(\"name\") == name:\n return wt\n return None\n\n def _validate_name(self, name: str):\n if not re.fullmatch(r\"[A-Za-z0-9._-]{1,40}\", name or \"\"):\n raise ValueError(\n \"Invalid worktree name. Use 1-40 chars: letters, numbers, ., _, -\"\n )\n\n def create(self, name: str, task_id: int = None, base_ref: str = \"HEAD\") -> str:\n self._validate_name(name)\n if self._find(name):\n raise ValueError(f\"Worktree '{name}' already exists in index\")\n if task_id is not None and not self.tasks.exists(task_id):\n raise ValueError(f\"Task {task_id} not found\")\n\n path = self.dir / name\n branch = f\"wt/{name}\"\n self.events.emit(\n \"worktree.create.before\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n )\n try:\n self._run_git([\"worktree\", \"add\", \"-b\", branch, str(path), base_ref])\n\n entry = {\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"task_id\": task_id,\n \"status\": \"active\",\n \"created_at\": time.time(),\n }\n\n idx = self._load_index()\n idx[\"worktrees\"].append(entry)\n self._save_index(idx)\n\n if task_id is not None:\n self.tasks.bind_worktree(task_id, name)\n\n self.events.emit(\n \"worktree.create.after\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\n \"name\": name,\n \"path\": str(path),\n \"branch\": branch,\n \"status\": \"active\",\n },\n )\n return json.dumps(entry, indent=2)\n except Exception as e:\n self.events.emit(\n \"worktree.create.failed\",\n task={\"id\": task_id} if task_id is not None else {},\n worktree={\"name\": name, \"base_ref\": base_ref},\n error=str(e),\n )\n raise\n\n def list_all(self) -> str:\n idx = self._load_index()\n wts = idx.get(\"worktrees\", [])\n if not wts:\n return \"No worktrees in index.\"\n lines = []\n for wt in wts:\n suffix = f\" task={wt['task_id']}\" if wt.get(\"task_id\") else \"\"\n lines.append(\n f\"[{wt.get('status', 'unknown')}] {wt['name']} -> \"\n f\"{wt['path']} ({wt.get('branch', '-')}){suffix}\"\n )\n return \"\\n\".join(lines)\n\n def status(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n r = subprocess.run(\n [\"git\", \"status\", \"--short\", \"--branch\"],\n cwd=path,\n capture_output=True,\n text=True,\n timeout=60,\n )\n text = (r.stdout + r.stderr).strip()\n return text or \"Clean worktree\"\n\n def run(self, name: str, command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n path = Path(wt[\"path\"])\n if not path.exists():\n return f\"Error: Worktree path missing: {path}\"\n\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=path,\n capture_output=True,\n text=True,\n timeout=300,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (300s)\"\n\n def remove(self, name: str, force: bool = False, complete_task: bool = False) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n self.events.emit(\n \"worktree.remove.before\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n )\n try:\n args = [\"worktree\", \"remove\"]\n if force:\n args.append(\"--force\")\n args.append(wt[\"path\"])\n self._run_git(args)\n\n if complete_task and wt.get(\"task_id\") is not None:\n task_id = wt[\"task_id\"]\n before = json.loads(self.tasks.get(task_id))\n self.tasks.update(task_id, status=\"completed\")\n self.tasks.unbind_worktree(task_id)\n self.events.emit(\n \"task.completed\",\n task={\n \"id\": task_id,\n \"subject\": before.get(\"subject\", \"\"),\n \"status\": \"completed\",\n },\n worktree={\"name\": name},\n )\n\n idx = self._load_index()\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"removed\"\n item[\"removed_at\"] = time.time()\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.remove.after\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\"), \"status\": \"removed\"},\n )\n return f\"Removed worktree '{name}'\"\n except Exception as e:\n self.events.emit(\n \"worktree.remove.failed\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\"name\": name, \"path\": wt.get(\"path\")},\n error=str(e),\n )\n raise\n\n def keep(self, name: str) -> str:\n wt = self._find(name)\n if not wt:\n return f\"Error: Unknown worktree '{name}'\"\n\n idx = self._load_index()\n kept = None\n for item in idx.get(\"worktrees\", []):\n if item.get(\"name\") == name:\n item[\"status\"] = \"kept\"\n item[\"kept_at\"] = time.time()\n kept = item\n self._save_index(idx)\n\n self.events.emit(\n \"worktree.keep\",\n task={\"id\": wt.get(\"task_id\")} if wt.get(\"task_id\") is not None else {},\n worktree={\n \"name\": name,\n \"path\": wt.get(\"path\"),\n \"status\": \"kept\",\n },\n )\n return json.dumps(kept, indent=2) if kept else f\"Error: Unknown worktree '{name}'\"\n\n\nWORKTREES = WorktreeManager(REPO_ROOT, TASKS, EVENTS)\n\n\n# -- Base tools (kept minimal, same style as previous sessions) --\ndef safe_path(p: str) -> Path:\n path = (WORKDIR / p).resolve()\n if not path.is_relative_to(WORKDIR):\n raise ValueError(f\"Path escapes workspace: {p}\")\n return path\n\n\ndef run_bash(command: str) -> str:\n dangerous = [\"rm -rf /\", \"sudo\", \"shutdown\", \"reboot\", \"> /dev/\"]\n if any(d in command for d in dangerous):\n return \"Error: Dangerous command blocked\"\n try:\n r = subprocess.run(\n command,\n shell=True,\n cwd=WORKDIR,\n capture_output=True,\n text=True,\n timeout=120,\n )\n out = (r.stdout + r.stderr).strip()\n return out[:50000] if out else \"(no output)\"\n except subprocess.TimeoutExpired:\n return \"Error: Timeout (120s)\"\n\n\ndef run_read(path: str, limit: int = None) -> str:\n try:\n lines = safe_path(path).read_text().splitlines()\n if limit and limit < len(lines):\n lines = lines[:limit] + [f\"... ({len(lines) - limit} more)\"]\n return \"\\n\".join(lines)[:50000]\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_write(path: str, content: str) -> str:\n try:\n fp = safe_path(path)\n fp.parent.mkdir(parents=True, exist_ok=True)\n fp.write_text(content)\n return f\"Wrote {len(content)} bytes\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\ndef run_edit(path: str, old_text: str, new_text: str) -> str:\n try:\n fp = safe_path(path)\n c = fp.read_text()\n if old_text not in c:\n return f\"Error: Text not found in {path}\"\n fp.write_text(c.replace(old_text, new_text, 1))\n return f\"Edited {path}\"\n except Exception as e:\n return f\"Error: {e}\"\n\n\nTOOL_HANDLERS = {\n \"bash\": lambda **kw: run_bash(kw[\"command\"]),\n \"read_file\": lambda **kw: run_read(kw[\"path\"], kw.get(\"limit\")),\n \"write_file\": lambda **kw: run_write(kw[\"path\"], kw[\"content\"]),\n \"edit_file\": lambda **kw: run_edit(kw[\"path\"], kw[\"old_text\"], kw[\"new_text\"]),\n \"task_create\": lambda **kw: TASKS.create(kw[\"subject\"], kw.get(\"description\", \"\")),\n \"task_list\": lambda **kw: TASKS.list_all(),\n \"task_get\": lambda **kw: TASKS.get(kw[\"task_id\"]),\n \"task_update\": lambda **kw: TASKS.update(kw[\"task_id\"], kw.get(\"status\"), kw.get(\"owner\")),\n \"task_bind_worktree\": lambda **kw: TASKS.bind_worktree(kw[\"task_id\"], kw[\"worktree\"], kw.get(\"owner\", \"\")),\n \"worktree_create\": lambda **kw: WORKTREES.create(kw[\"name\"], kw.get(\"task_id\"), kw.get(\"base_ref\", \"HEAD\")),\n \"worktree_list\": lambda **kw: WORKTREES.list_all(),\n \"worktree_status\": lambda **kw: WORKTREES.status(kw[\"name\"]),\n \"worktree_run\": lambda **kw: WORKTREES.run(kw[\"name\"], kw[\"command\"]),\n \"worktree_keep\": lambda **kw: WORKTREES.keep(kw[\"name\"]),\n \"worktree_remove\": lambda **kw: WORKTREES.remove(kw[\"name\"], kw.get(\"force\", False), kw.get(\"complete_task\", False)),\n \"worktree_events\": lambda **kw: EVENTS.list_recent(kw.get(\"limit\", 20)),\n}\n\nTOOLS = [\n {\n \"name\": \"bash\",\n \"description\": \"Run a shell command in the current workspace (blocking).\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"command\": {\"type\": \"string\"}},\n \"required\": [\"command\"],\n },\n },\n {\n \"name\": \"read_file\",\n \"description\": \"Read file contents.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"limit\": {\"type\": \"integer\"},\n },\n \"required\": [\"path\"],\n },\n },\n {\n \"name\": \"write_file\",\n \"description\": \"Write content to file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"content\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"content\"],\n },\n },\n {\n \"name\": \"edit_file\",\n \"description\": \"Replace exact text in file.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"path\": {\"type\": \"string\"},\n \"old_text\": {\"type\": \"string\"},\n \"new_text\": {\"type\": \"string\"},\n },\n \"required\": [\"path\", \"old_text\", \"new_text\"],\n },\n },\n {\n \"name\": \"task_create\",\n \"description\": \"Create a new task on the shared task board.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"subject\": {\"type\": \"string\"},\n \"description\": {\"type\": \"string\"},\n },\n \"required\": [\"subject\"],\n },\n },\n {\n \"name\": \"task_list\",\n \"description\": \"List all tasks with status, owner, and worktree binding.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"task_get\",\n \"description\": \"Get task details by ID.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"task_id\": {\"type\": \"integer\"}},\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_update\",\n \"description\": \"Update task status or owner.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"status\": {\n \"type\": \"string\",\n \"enum\": [\"pending\", \"in_progress\", \"completed\"],\n },\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\"],\n },\n },\n {\n \"name\": \"task_bind_worktree\",\n \"description\": \"Bind a task to a worktree name.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"task_id\": {\"type\": \"integer\"},\n \"worktree\": {\"type\": \"string\"},\n \"owner\": {\"type\": \"string\"},\n },\n \"required\": [\"task_id\", \"worktree\"],\n },\n },\n {\n \"name\": \"worktree_create\",\n \"description\": \"Create a git worktree and optionally bind it to a task.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"task_id\": {\"type\": \"integer\"},\n \"base_ref\": {\"type\": \"string\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_list\",\n \"description\": \"List worktrees tracked in .worktrees/index.json.\",\n \"input_schema\": {\"type\": \"object\", \"properties\": {}},\n },\n {\n \"name\": \"worktree_status\",\n \"description\": \"Show git status for one worktree.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_run\",\n \"description\": \"Run a shell command in a named worktree directory.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"command\": {\"type\": \"string\"},\n },\n \"required\": [\"name\", \"command\"],\n },\n },\n {\n \"name\": \"worktree_remove\",\n \"description\": \"Remove a worktree and optionally mark its bound task completed.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": {\"type\": \"string\"},\n \"force\": {\"type\": \"boolean\"},\n \"complete_task\": {\"type\": \"boolean\"},\n },\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_keep\",\n \"description\": \"Mark a worktree as kept in lifecycle state without removing it.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"name\": {\"type\": \"string\"}},\n \"required\": [\"name\"],\n },\n },\n {\n \"name\": \"worktree_events\",\n \"description\": \"List recent worktree/task lifecycle events from .worktrees/events.jsonl.\",\n \"input_schema\": {\n \"type\": \"object\",\n \"properties\": {\"limit\": {\"type\": \"integer\"}},\n },\n },\n]\n\n\ndef agent_loop(messages: list):\n while True:\n response = client.messages.create(\n model=MODEL,\n system=SYSTEM,\n messages=messages,\n tools=TOOLS,\n max_tokens=8000,\n )\n messages.append({\"role\": \"assistant\", \"content\": response.content})\n if response.stop_reason != \"tool_use\":\n return\n\n results = []\n for block in response.content:\n if block.type == \"tool_use\":\n handler = TOOL_HANDLERS.get(block.name)\n try:\n output = handler(**block.input) if handler else f\"Unknown tool: {block.name}\"\n except Exception as e:\n output = f\"Error: {e}\"\n print(f\"> {block.name}: {str(output)[:200]}\")\n results.append(\n {\n \"type\": \"tool_result\",\n \"tool_use_id\": block.id,\n \"content\": str(output),\n }\n )\n messages.append({\"role\": \"user\", \"content\": results})\n\n\nif __name__ == \"__main__\":\n print(f\"Repo root for s12: {REPO_ROOT}\")\n if not WORKTREES.git_available:\n print(\"Note: Not in a git repo. worktree_* tools will return errors.\")\n\n history = []\n while True:\n try:\n query = input(\"\\033[36ms12 >> \\033[0m\")\n except (EOFError, KeyboardInterrupt):\n break\n if query.strip().lower() in (\"q\", \"exit\", \"\"):\n break\n history.append({\"role\": \"user\", \"content\": query})\n agent_loop(history)\n print()\n"
}
],
"diffs": [
{
"from": "s01",
"to": "s02",
"newClasses": [],
"newFunctions": [
"safe_path",
"run_read",
"run_write",
"run_edit"
],
"newTools": [
"read_file",
"write_file",
"edit_file"
],
"locDelta": 31
},
{
"from": "s02",
"to": "s03",
"newClasses": [
"TodoManager"
],
"newFunctions": [],
"newTools": [
"todo"
],
"locDelta": 56
},
{
"from": "s03",
"to": "s04",
"newClasses": [],
"newFunctions": [
"run_subagent"
],
"newTools": [
"task"
],
"locDelta": -25
},
{
"from": "s04",
"to": "s05",
"newClasses": [
"SkillLoader"
],
"newFunctions": [],
"newTools": [
"load_skill"
],
"locDelta": 36
},
{
"from": "s05",
"to": "s06",
"newClasses": [],
"newFunctions": [
"estimate_tokens",
"micro_compact",
"auto_compact"
],
"newTools": [
"compact"
],
"locDelta": 18
},
{
"from": "s06",
"to": "s07",
"newClasses": [
"TaskManager"
],
"newFunctions": [],
"newTools": [
"task_create",
"task_update",
"task_list",
"task_get"
],
"locDelta": 2
},
{
"from": "s07",
"to": "s08",
"newClasses": [
"BackgroundManager"
],
"newFunctions": [],
"newTools": [
"background_run",
"check_background"
],
"locDelta": -9
},
{
"from": "s08",
"to": "s09",
"newClasses": [
"MessageBus",
"TeammateManager"
],
"newFunctions": [
"_safe_path",
"_run_bash",
"_run_read",
"_run_write",
"_run_edit"
],
"newTools": [
"alice",
"send_message",
"read_inbox",
"spawn_teammate",
"list_teammates",
"broadcast"
],
"locDelta": 150
},
{
"from": "s09",
"to": "s10",
"newClasses": [],
"newFunctions": [
"handle_shutdown_request",
"handle_plan_review",
"_check_shutdown_status"
],
"newTools": [
"shutdown_response",
"plan_approval",
"shutdown_request"
],
"locDelta": 71
},
{
"from": "s10",
"to": "s11",
"newClasses": [],
"newFunctions": [
"scan_unclaimed_tasks",
"claim_task",
"make_identity_block"
],
"newTools": [
"idle",
"claim_task"
],
"locDelta": 80
},
{
"from": "s11",
"to": "s12",
"newClasses": [
"EventBus",
"TaskManager",
"WorktreeManager"
],
"newFunctions": [
"detect_repo_root",
"safe_path",
"run_bash",
"run_read",
"run_write",
"run_edit"
],
"newTools": [
"task_create",
"task_list",
"task_get",
"task_update",
"task_bind_worktree",
"worktree_create",
"worktree_list",
"worktree_status",
"worktree_run",
"worktree_remove",
"worktree_keep",
"worktree_events"
],
"locDelta": 195
}
]
}
================================================
FILE: web/src/data/scenarios/s01.json
================================================
{
"version": "s01",
"title": "The Agent Loop",
"description": "A minimal agent that uses only bash to accomplish tasks",
"steps": [
{
"type": "user_message",
"content": "Create a file called hello.py that prints 'Hello, World!'",
"annotation": "User sends a task to the agent"
},
{
"type": "assistant_text",
"content": "I'll create that file for you using bash.",
"annotation": "The model decides to use its only tool: bash"
},
{
"type": "tool_call",
"content": "echo 'print(\"Hello, World!\")' > hello.py",
"toolName": "bash",
"annotation": "Tool call: the model generates a bash command"
},
{
"type": "tool_result",
"content": "",
"toolName": "bash",
"annotation": "Bash returns empty output (success)"
},
{
"type": "assistant_text",
"content": "Let me verify the file was created correctly.",
"annotation": "The loop continues: model sees the result and decides next action"
},
{
"type": "tool_call",
"content": "cat hello.py",
"toolName": "bash",
"annotation": "Another bash call to verify"
},
{
"type": "tool_result",
"content": "print(\"Hello, World!\")",
"toolName": "bash",
"annotation": "File contents confirmed"
},
{
"type": "assistant_text",
"content": "Done! I've created hello.py with the print statement. You can run it with `python hello.py`.",
"annotation": "stop_reason != 'tool_use' -> loop breaks, agent responds to user"
}
]
}
================================================
FILE: web/src/data/scenarios/s02.json
================================================
{
"version": "s02",
"title": "Tools",
"description": "Agent with read, write, edit, and bash tools",
"steps": [
{
"type": "user_message",
"content": "Add a function called greet(name) to hello.py",
"annotation": "User requests a code modification"
},
{
"type": "assistant_text",
"content": "Let me read the current file first.",
"annotation": "Model chooses read_file instead of bash cat"
},
{
"type": "tool_call",
"content": "hello.py",
"toolName": "read_file",
"annotation": "Dedicated read tool provides cleaner file access"
},
{
"type": "tool_result",
"content": "print(\"Hello, World!\")",
"toolName": "read_file",
"annotation": "File contents returned to the model"
},
{
"type": "tool_call",
"content": "hello.py\n---\ndef greet(name):\n print(f\"Hello, {name}!\")\n\ngreet(\"World\")",
"toolName": "write_file",
"annotation": "write_file replaces the entire file content"
},
{
"type": "tool_result",
"content": "File written successfully",
"toolName": "write_file",
"annotation": "Write confirmed"
},
{
"type": "assistant_text",
"content": "I've updated hello.py with a greet(name) function that takes a name parameter.",
"annotation": "4 tools beat 1: each tool has a clear, focused purpose"
}
]
}
================================================
FILE: web/src/data/scenarios/s03.json
================================================
{
"version": "s03",
"title": "TodoWrite",
"description": "Agent creates a visible plan before executing",
"steps": [
{
"type": "user_message",
"content": "Build a CLI calculator that supports add, subtract, multiply, divide",
"annotation": "A multi-step task that benefits from planning"
},
{
"type": "tool_call",
"content": "[\n { \"id\": 1, \"task\": \"Create calc.py with argument parsing\", \"status\": \"in_progress\" },\n { \"id\": 2, \"task\": \"Implement 4 operations\", \"status\": \"pending\" },\n { \"id\": 3, \"task\": \"Add error handling\", \"status\": \"pending\" },\n { \"id\": 4, \"task\": \"Test all operations\", \"status\": \"pending\" }\n]",
"toolName": "todo_write",
"annotation": "TodoManager creates a visible plan BEFORE any code is written"
},
{
"type": "tool_result",
"content": "Todo list created with 4 tasks",
"toolName": "todo_write",
"annotation": "The plan is now visible and trackable"
},
{
"type": "tool_call",
"content": "calc.py\n---\nimport sys\n\ndef calculate(op, a, b):\n ops = {'+': a+b, '-': a-b, '*': a*b, '/': a/b}\n return ops.get(op, 'Unknown operator')\n\nif __name__ == '__main__':\n a, op, b = float(sys.argv[1]), sys.argv[2], float(sys.argv[3])\n print(calculate(op, a, b))",
"toolName": "write_file",
"annotation": "Executing task #1 and #2: create file with operations"
},
{
"type": "tool_result",
"content": "File written successfully",
"toolName": "write_file",
"annotation": "Code written"
},
{
"type": "tool_call",
"content": "python calc.py 10 + 5",
"toolName": "bash",
"annotation": "Executing task #4: testing"
},
{
"type": "tool_result",
"content": "15.0",
"toolName": "bash",
"annotation": "Test passes"
},
{
"type": "assistant_text",
"content": "Calculator built and tested. All 4 operations work correctly.",
"annotation": "Visible plans improve task completion by making progress trackable"
}
]
}
================================================
FILE: web/src/data/scenarios/s04.json
================================================
{
"version": "s04",
"title": "Subagents",
"description": "Agent spawns a subagent for isolated subtask execution",
"steps": [
{
"type": "user_message",
"content": "Refactor the calculator: split into modules and add tests",
"annotation": "Complex task suitable for delegation"
},
{
"type": "assistant_text",
"content": "I'll delegate the test writing to a subagent while I handle the refactoring.",
"annotation": "Agent decides to split work using the Task tool"
},
{
"type": "tool_call",
"content": "Write unit tests for calc.py covering all 4 operations and edge cases like division by zero",
"toolName": "task",
"annotation": "Task tool spawns a subagent with its own isolated context"
},
{
"type": "system_event",
"content": "[Subagent spawned] Context: fresh messages[], same tools, scoped prompt",
"annotation": "Subagent gets clean context -- no confusion from parent's conversation"
},
{
"type": "tool_result",
"content": "Created test_calc.py with 8 test cases. All passing.",
"toolName": "task",
"annotation": "Subagent completes and returns a summary to parent"
},
{
"type": "tool_call",
"content": "calc.py -> calc/operations.py, calc/cli.py",
"toolName": "write_file",
"annotation": "Parent continues with its own task: refactoring"
},
{
"type": "tool_result",
"content": "Files written successfully",
"toolName": "write_file",
"annotation": "Modules split"
},
{
"type": "assistant_text",
"content": "Refactored into calc/operations.py and calc/cli.py. Tests were written by a subagent and are all passing.",
"annotation": "Context isolation prevents confusion between tasks"
}
]
}
================================================
FILE: web/src/data/scenarios/s05.json
================================================
{
"version": "s05",
"title": "Skills",
"description": "Agent loads external knowledge from SKILL.md files",
"steps": [
{
"type": "user_message",
"content": "Set up a new Next.js project with TypeScript",
"annotation": "Task requires framework-specific knowledge"
},
{
"type": "system_event",
"content": "[SkillLoader] Scanning for SKILL.md files... Found: nextjs.SKILL.md",
"annotation": "SkillLoader finds a matching skill file before the loop runs"
},
{
"type": "system_event",
"content": "[Skill injected] nextjs.SKILL.md -> tool_result message appended",
"annotation": "Skill content is injected as a tool_result, not bloating the system prompt"
},
{
"type": "assistant_text",
"content": "I have knowledge about Next.js setup. Let me create the project structure.",
"annotation": "Model now has framework-specific knowledge from the skill file"
},
{
"type": "tool_call",
"content": "npx create-next-app@latest my-app --typescript --tailwind --eslint --app --src-dir",
"toolName": "bash",
"annotation": "Using skill-provided knowledge for correct flags"
},
{
"type": "tool_result",
"content": "Success! Created my-app at /workspace/my-app",
"toolName": "bash",
"annotation": "Project created with correct configuration"
},
{
"type": "assistant_text",
"content": "Next.js project created with TypeScript, Tailwind CSS, ESLint, App Router, and src/ directory.",
"annotation": "Skills inject via tool_result, not system prompt -- keeping context lean"
}
]
}
================================================
FILE: web/src/data/scenarios/s06.json
================================================
{
"version": "s06",
"title": "Compact",
"description": "Agent compresses context when tokens exceed threshold",
"steps": [
{
"type": "user_message",
"content": "Analyze all Python files in this project and summarize each one",
"annotation": "A task that will generate large amounts of context"
},
{
"type": "tool_call",
"content": "find . -name '*.py' -type f",
"toolName": "bash",
"annotation": "Finding all files to analyze"
},
{
"type": "tool_result",
"content": "./src/main.py\n./src/utils.py\n./src/models.py\n./tests/test_main.py\n... (28 more files)",
"toolName": "bash",
"annotation": "32 files found -- reading all of them will generate massive context"
},
{
"type": "assistant_text",
"content": "I'll read and analyze each file systematically.",
"annotation": "Agent begins processing files one by one"
},
{
"type": "tool_call",
"content": "./src/main.py",
"toolName": "read_file",
"annotation": "Reading file 1 of 32..."
},
{
"type": "tool_result",
"content": "[... 450 lines of code ...]",
"toolName": "read_file",
"annotation": "Large file content added to messages[]"
},
{
"type": "system_event",
"content": "[ContextManager] Token count: 148,231 / 150,000 threshold\nTriggering 3-layer compression:\n 1. Drop tool_result content > 1000 chars -> replaced with summary\n 2. Merge sequential assistant turns\n 3. Summarize oldest 50% of conversation\nResult: 148,231 -> 42,087 tokens",
"annotation": "Compression fires automatically when approaching the limit"
},
{
"type": "assistant_text",
"content": "Continuing analysis with compressed context. Files 1-18 have been summarized, proceeding with remaining files.",
"annotation": "Forgetting old tool results enables infinite work -- the agent can process any number of files"
}
]
}
================================================
FILE: web/src/data/scenarios/s07.json
================================================
{
"version": "s07",
"title": "Tasks",
"description": "Agent uses persistent file-based tasks with dependencies",
"steps": [
{
"type": "user_message",
"content": "Build a REST API with user auth, database, and deployment config",
"annotation": "Multi-part task requiring dependency tracking"
},
{
"type": "tool_call",
"content": "create_task(\"Set up database schema\", deps=[])\ncreate_task(\"Implement user model\", deps=[1])\ncreate_task(\"Add auth endpoints\", deps=[2])\ncreate_task(\"Write deployment config\", deps=[3])",
"toolName": "task_manager",
"annotation": "TaskManager creates file-based tasks with explicit dependencies"
},
{
"type": "tool_result",
"content": "Created 4 tasks in .tasks/\n #1 Set up database schema [ready]\n #2 Implement user model [blocked by #1]\n #3 Add auth endpoints [blocked by #2]\n #4 Write deployment config [blocked by #3]",
"toolName": "task_manager",
"annotation": "Tasks persisted to .tasks/ directory as JSON files"
},
{
"type": "tool_call",
"content": "schema.sql\n---\nCREATE TABLE users (id SERIAL PRIMARY KEY, email TEXT UNIQUE, password_hash TEXT);",
"toolName": "write_file",
"annotation": "Working on task #1"
},
{
"type": "tool_result",
"content": "File written successfully",
"toolName": "write_file",
"annotation": "Schema created"
},
{
"type": "tool_call",
"content": "complete_task(1)",
"toolName": "task_manager",
"annotation": "Completing task #1 automatically unblocks task #2"
},
{
"type": "tool_result",
"content": "Task #1 completed. Task #2 is now ready.",
"toolName": "task_manager",
"annotation": "File-based persistence outlives process memory"
},
{
"type": "assistant_text",
"content": "Database schema is done. Moving to user model implementation.",
"annotation": "Dependency graph ensures tasks execute in correct order"
}
]
}
================================================
FILE: web/src/data/scenarios/s08.json
================================================
{
"version": "s08",
"title": "Background Tasks",
"description": "Agent executes long-running tasks in background threads",
"steps": [
{
"type": "user_message",
"content": "Run the test suite and while it's running, fix the linting errors",
"annotation": "Two tasks that can run concurrently"
},
{
"type": "assistant_text",
"content": "I'll start the tests in the background and fix linting in parallel.",
"annotation": "Agent recognizes opportunity for concurrent execution"
},
{
"type": "tool_call",
"content": "pytest tests/ -v --timeout=300",
"toolName": "background_task",
"annotation": "BackgroundManager spawns a thread for long-running test suite"
},
{
"type": "system_event",
"content": "[BackgroundManager] Thread spawned: task_001\nRunning: pytest tests/ -v --timeout=300\nStatus: in_progress",
"annotation": "Thread is running independently -- main loop continues"
},
{
"type": "tool_result",
"content": "Background task started: task_001",
"toolName": "background_task",
"annotation": "Control returns immediately to the agent"
},
{
"type": "tool_call",
"content": "ruff check --fix .",
"toolName": "bash",
"annotation": "Agent works on linting while tests run in background"
},
{
"type": "tool_result",
"content": "Fixed 12 linting errors in 5 files",
"toolName": "bash",
"annotation": "Linting fixed concurrently with tests"
},
{
"type": "system_event",
"content": "[Notification] Background task_001 completed\nResult: 47 tests passed, 2 failed\nFailed: test_auth.py::test_token_expiry, test_api.py::test_rate_limit",
"annotation": "Background thread notifies main loop via notification queue"
},
{
"type": "assistant_text",
"content": "Linting is fixed. Tests finished: 47 passed, 2 failed. I'll fix the failing tests now.",
"annotation": "Non-blocking execution via threads + queue enables parallel work"
}
]
}
================================================
FILE: web/src/data/scenarios/s09.json
================================================
{
"version": "s09",
"title": "Agent Teams",
"description": "Persistent teammates communicate via JSONL inboxes",
"steps": [
{
"type": "user_message",
"content": "Build a full-stack app: frontend in React, backend in Python, with tests",
"annotation": "Large task requiring team collaboration"
},
{
"type": "tool_call",
"content": "spawn_teammate(\n name: \"frontend-dev\",\n role: \"React specialist\"\n)\nspawn_teammate(\n name: \"backend-dev\",\n role: \"Python API specialist\"\n)",
"toolName": "teammate_manager",
"annotation": "Unlike subagents (s04) that die after one task, teammates persist"
},
{
"type": "system_event",
"content": "[TeammateManager] Team created:\n .team/config.json written\n frontend-dev [idle] - own thread, own agent loop\n backend-dev [idle] - own thread, own agent loop",
"annotation": "Each teammate runs its own while(tool_use) loop in a separate thread"
},
{
"type": "tool_call",
"content": "send_message(\n to: \"backend-dev\",\n content: \"Build the user API: GET/POST /users\"\n)",
"toolName": "send_message",
"annotation": "Message appended to .team/inbox/backend-dev.jsonl"
},
{
"type": "system_event",
"content": "[backend-dev] Checking inbox... 1 new message\n From: lead\n Content: \"Build the user API: GET/POST /users\"\n (inbox drained after read)",
"annotation": "Inbox is read-then-clear: prevents duplicate processing"
},
{
"type": "system_event",
"content": "[backend-dev -> frontend-dev] Reply via inbox:\n \"API ready: GET /users, POST /users, GET /users/:id\"\n Written to .team/inbox/frontend-dev.jsonl",
"annotation": "Teammates message each other through the same JSONL mechanism"
},
{
"type": "assistant_text",
"content": "Team is communicating via file-based inboxes. Each teammate has its own .jsonl file -- append to send, drain to read.",
"annotation": "JSONL inboxes decouple sender timing from receiver timing"
}
]
}
================================================
FILE: web/src/data/scenarios/s10.json
================================================
{
"version": "s10",
"title": "Team Protocols",
"description": "Request-response protocols for shutdown and plan approval",
"steps": [
{
"type": "user_message",
"content": "Work is done. Shut down the team gracefully.",
"annotation": "Can't just kill threads -- need coordinated shutdown"
},
{
"type": "tool_call",
"content": "shutdown_request(\n teammate: \"backend-dev\",\n request_id: \"req_abc123\"\n)",
"toolName": "shutdown_teammate",
"annotation": "request_id correlates the request with the response"
},
{
"type": "system_event",
"content": "[Shutdown FSM] req_abc123: pending\n Lead -> backend-dev inbox:\n { type: \"shutdown_request\", request_id: \"req_abc123\" }",
"annotation": "FSM tracks state: pending -> approved | rejected"
},
{
"type": "system_event",
"content": "[backend-dev] Received shutdown request req_abc123\n Current task: none (idle)\n Decision: approve\n -> lead inbox: { type: \"shutdown_response\", request_id: \"req_abc123\", approve: true }",
"annotation": "Teammate decides whether to approve based on its own state"
},
{
"type": "system_event",
"content": "[Shutdown FSM] req_abc123: approved\n backend-dev thread stopped\n Status: idle -> shutdown",
"annotation": "Same request_id pattern works for plan approval too"
},
{
"type": "assistant_text",
"content": "backend-dev shut down gracefully. The same request_id + FSM pattern handles plan approval: teammate submits plan, lead approves/rejects with the correlated request_id.",
"annotation": "One pattern (request_id correlation), two applications (shutdown + plan approval)"
}
]
}
================================================
FILE: web/src/data/scenarios/s11.json
================================================
{
"version": "s11",
"title": "Autonomous Agents",
"description": "Teammates self-govern with idle cycles and auto-claiming",
"steps": [
{
"type": "user_message",
"content": "Implement the full feature backlog",
"annotation": "Open-ended task for autonomous team execution"
},
{
"type": "tool_call",
"content": "create_tasks([\n { title: \"User authentication\", status: \"open\" },\n { title: \"Dashboard UI\", status: \"open\" },\n { title: \"API rate limiting\", status: \"open\" },\n { title: \"Integration tests\", status: \"open\" }\n])",
"toolName": "task_manager",
"annotation": "Tasks created with no owner -- available for auto-claiming"
},
{
"type": "tool_result",
"content": "4 tasks created on shared board",
"toolName": "task_manager",
"annotation": "Unassigned tasks visible to all teammates"
},
{
"type": "system_event",
"content": "[frontend-dev] Idle cycle triggered\n 1. Check inbox -> 0 messages\n 2. Poll task board -> found unowned task #2 \"Dashboard UI\"\n 3. Auto-claim task #2\n 4. Status: idle -> working",
"annotation": "Idle cycle: check inbox, poll tasks, auto-claim, resume work"
},
{
"type": "system_event",
"content": "[backend-dev] Idle cycle triggered\n 1. Check inbox -> 0 messages\n 2. Poll task board -> found unowned task #1 \"User authentication\"\n 3. Auto-claim task #1\n 4. Status: idle -> working",
"annotation": "Multiple teammates claim different tasks concurrently"
},
{
"type": "system_event",
"content": "[tester] Idle cycle triggered\n 1. Check inbox -> 0 messages\n 2. Poll task board -> task #4 blocked by #1, #2, #3\n 3. No claimable tasks\n 4. Status: idle (will retry in 30s)",
"annotation": "Timeout-based polling prevents busy-waiting"
},
{
"type": "assistant_text",
"content": "Team is self-organizing: frontend-dev claimed Dashboard UI, backend-dev claimed User auth. Tester is waiting for dependencies to clear.",
"annotation": "Polling + timeout makes teammates autonomous -- no micromanagement needed"
}
]
}
================================================
FILE: web/src/data/scenarios/s12.json
================================================
{
"version": "s12",
"title": "Worktree + Task Isolation",
"description": "Use a shared task board with optional worktree lanes for clean parallel execution",
"steps": [
{
"type": "user_message",
"content": "Implement auth refactor and login UI updates in parallel",
"annotation": "Two active tasks in one workspace would collide"
},
{
"type": "tool_call",
"content": "task_create(subject: \"Auth refactor\")\ntask_create(subject: \"Login UI polish\")",
"toolName": "task_manager",
"annotation": "Shared board remains the coordination source of truth"
},
{
"type": "tool_call",
"content": "worktree_create(name: \"auth-refactor\", task_id: 1)\nworktree_create(name: \"ui-login\")\ntask_bind_worktree(task_id: 2, worktree: \"ui-login\")",
"toolName": "worktree_manager",
"annotation": "Lane allocation and task association are composable; task 2 binds after lane creation"
},
{
"type": "system_event",
"content": "worktree.create.before/after emitted\n.tasks/task_1.json -> { status: \"in_progress\", worktree: \"auth-refactor\" }\n.tasks/task_2.json -> { status: \"in_progress\", worktree: \"ui-login\" }\n.worktrees/index.json updated",
"annotation": "Control-plane state remains canonical; hook-style consumers can react to lifecycle events without owning canonical state writes"
},
{
"type": "tool_call",
"content": "worktree_run(name: \"auth-refactor\", command: \"pytest tests/auth -q\")\nworktree_run(name: \"ui-login\", command: \"npm test -- login\")",
"toolName": "worktree_run",
"annotation": "In this teaching runtime, commands route by lane-scoped cwd; other runtimes may use session-level directory switches. The invariant is explicit execution context."
},
{
"type": "tool_call",
"content": "worktree_keep(name: \"ui-login\")\nworktree_remove(name: \"auth-refactor\", complete_task: true)\nworktree_events(limit: 10)",
"toolName": "worktree_manager",
"annotation": "Closeout is explicit tool-driven state transition: mix keep/remove decisions and query lifecycle events in one pass"
},
{
"type": "system_event",
"content": "worktree.keep emitted for ui-login\nworktree.remove.before/after emitted for auth-refactor\ntask.completed emitted for #1\n.worktrees/events.jsonl appended",
"annotation": "Lifecycle transitions become explicit records while task/worktree files remain source-of-truth"
},
{
"type": "assistant_text",
"content": "Task board handles coordination, worktrees handle isolation. Parallel tracks stay clean and auditable.",
"annotation": "Coordinate in one board, isolate by lane only where needed, and run optional policy/audit side effects from lifecycle events"
}
]
}
================================================
FILE: web/src/hooks/useDarkMode.ts
================================================
"use client";
import { useState, useEffect } from "react";
export function useDarkMode(): boolean {
const [isDark, setIsDark] = useState(false);
useEffect(() => {
const html = document.documentElement;
setIsDark(html.classList.contains("dark"));
const observer = new MutationObserver(() => {
setIsDark(html.classList.contains("dark"));
});
observer.observe(html, { attributes: true, attributeFilter: ["class"] });
return () => observer.disconnect();
}, []);
return isDark;
}
export interface SvgPalette {
nodeFill: string;
nodeStroke: string;
nodeText: string;
activeNodeFill: string;
activeNodeStroke: string;
activeNodeText: string;
endNodeFill: string;
endNodeStroke: string;
edgeStroke: string;
activeEdgeStroke: string;
arrowFill: string;
labelFill: string;
bgSubtle: string;
}
export function useSvgPalette(): SvgPalette {
const isDark = useDarkMode();
if (isDark) {
return {
nodeFill: "#27272a",
nodeStroke: "#3f3f46",
nodeText: "#d4d4d8",
activeNodeFill: "#3b82f6",
activeNodeStroke: "#2563eb",
activeNodeText: "#ffffff",
endNodeFill: "#a855f7",
endNodeStroke: "#9333ea",
edgeStroke: "#52525b",
activeEdgeStroke: "#3b82f6",
arrowFill: "#71717a",
labelFill: "#a1a1aa",
bgSubtle: "#18181b",
};
}
return {
nodeFill: "#e2e8f0",
nodeStroke: "#cbd5e1",
nodeText: "#475569",
activeNodeFill: "#3b82f6",
activeNodeStroke: "#2563eb",
activeNodeText: "#ffffff",
endNodeFill: "#a855f7",
endNodeStroke: "#9333ea",
edgeStroke: "#cbd5e1",
activeEdgeStroke: "#3b82f6",
arrowFill: "#94a3b8",
labelFill: "#94a3b8",
bgSubtle: "#f8fafc",
};
}
================================================
FILE: web/src/hooks/useSimulator.ts
================================================
"use client";
import { useState, useCallback, useRef, useEffect } from "react";
import type { SimStep } from "@/types/agent-data";
interface SimulatorState {
currentIndex: number;
isPlaying: boolean;
speed: number;
}
export function useSimulator(steps: SimStep[]) {
const [state, setState] = useState({
currentIndex: -1,
isPlaying: false,
speed: 1,
});
const timerRef = useRef | null>(null);
const clearTimer = useCallback(() => {
if (timerRef.current) {
clearTimeout(timerRef.current);
timerRef.current = null;
}
}, []);
const stepForward = useCallback(() => {
setState((prev) => {
if (prev.currentIndex >= steps.length - 1) {
return { ...prev, isPlaying: false };
}
return { ...prev, currentIndex: prev.currentIndex + 1 };
});
}, [steps.length]);
const play = useCallback(() => {
setState((prev) => {
if (prev.currentIndex >= steps.length - 1) {
return prev;
}
return { ...prev, isPlaying: true };
});
}, [steps.length]);
const pause = useCallback(() => {
clearTimer();
setState((prev) => ({ ...prev, isPlaying: false }));
}, [clearTimer]);
const reset = useCallback(() => {
clearTimer();
setState({ currentIndex: -1, isPlaying: false, speed: state.speed });
}, [clearTimer, state.speed]);
const setSpeed = useCallback((speed: number) => {
setState((prev) => ({ ...prev, speed }));
}, []);
useEffect(() => {
if (state.isPlaying && state.currentIndex < steps.length - 1) {
const delay = 1200 / state.speed;
timerRef.current = setTimeout(() => {
stepForward();
}, delay);
} else if (state.isPlaying && state.currentIndex >= steps.length - 1) {
setState((prev) => ({ ...prev, isPlaying: false }));
}
return () => clearTimer();
}, [state.isPlaying, state.currentIndex, state.speed, steps.length, stepForward, clearTimer]);
return {
currentIndex: state.currentIndex,
isPlaying: state.isPlaying,
speed: state.speed,
visibleSteps: steps.slice(0, state.currentIndex + 1),
totalSteps: steps.length,
isComplete: state.currentIndex >= steps.length - 1,
play,
pause,
stepForward,
reset,
setSpeed,
};
}
================================================
FILE: web/src/hooks/useSteppedVisualization.ts
================================================
"use client";
import { useState, useCallback, useEffect, useRef } from "react";
interface SteppedVisualizationOptions {
totalSteps: number;
autoPlayInterval?: number; // ms, default 2000
}
interface SteppedVisualizationReturn {
currentStep: number;
totalSteps: number;
next: () => void;
prev: () => void;
reset: () => void;
goToStep: (step: number) => void;
isPlaying: boolean;
toggleAutoPlay: () => void;
isFirstStep: boolean;
isLastStep: boolean;
}
export function useSteppedVisualization({
totalSteps,
autoPlayInterval = 2000,
}: SteppedVisualizationOptions): SteppedVisualizationReturn {
const [currentStep, setCurrentStep] = useState(0);
const [isPlaying, setIsPlaying] = useState(false);
const intervalRef = useRef | null>(null);
const next = useCallback(() => {
setCurrentStep((prev) => Math.min(prev + 1, totalSteps - 1));
}, [totalSteps]);
const prev = useCallback(() => {
setCurrentStep((prev) => Math.max(prev - 1, 0));
}, []);
const reset = useCallback(() => {
setCurrentStep(0);
setIsPlaying(false);
}, []);
const goToStep = useCallback(
(step: number) => {
setCurrentStep(Math.max(0, Math.min(step, totalSteps - 1)));
},
[totalSteps]
);
const toggleAutoPlay = useCallback(() => {
setIsPlaying((prev) => !prev);
}, []);
useEffect(() => {
if (isPlaying) {
intervalRef.current = setInterval(() => {
setCurrentStep((prev) => {
if (prev >= totalSteps - 1) {
setIsPlaying(false);
return prev;
}
return prev + 1;
});
}, autoPlayInterval);
}
return () => {
if (intervalRef.current) clearInterval(intervalRef.current);
};
}, [isPlaying, totalSteps, autoPlayInterval]);
return {
currentStep,
totalSteps,
next,
prev,
reset,
goToStep,
isPlaying,
toggleAutoPlay,
isFirstStep: currentStep === 0,
isLastStep: currentStep === totalSteps - 1,
};
}
================================================
FILE: web/src/i18n/messages/en.json
================================================
{
"meta": { "title": "Learn Claude Code", "description": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time" },
"nav": { "home": "Home", "timeline": "Timeline", "compare": "Compare", "layers": "Layers", "github": "GitHub" },
"home": { "hero_title": "Learn Claude Code", "hero_subtitle": "Build a nano Claude Code-like agent from 0 to 1, one mechanism at a time", "start": "Start Learning", "core_pattern": "The Core Pattern", "core_pattern_desc": "Every AI coding agent shares the same loop: call the model, execute tools, feed results back. Production systems add policy, permissions, and lifecycle layers on top.", "learning_path": "Learning Path", "learning_path_desc": "12 progressive sessions, from a simple loop to isolated autonomous execution", "layers_title": "Architectural Layers", "layers_desc": "Five orthogonal concerns that compose into a complete agent", "loc": "LOC", "learn_more": "Learn More", "versions_in_layer": "versions", "message_flow": "Message Growth", "message_flow_desc": "Watch the messages array grow as the agent loop executes" },
"version": { "loc": "lines of code", "tools": "tools", "new": "New", "prev": "Previous", "next": "Next", "view_source": "View Source", "view_diff": "View Diff", "design_decisions": "Design Decisions", "whats_new": "What's New", "tutorial": "Tutorial", "simulator": "Agent Loop Simulator", "execution_flow": "Execution Flow", "architecture": "Architecture", "concept_viz": "Concept Visualization", "alternatives": "Alternatives Considered", "tab_learn": "Learn", "tab_simulate": "Simulate", "tab_code": "Code", "tab_deep_dive": "Deep Dive" },
"sim": { "play": "Play", "pause": "Pause", "step": "Step", "reset": "Reset", "speed": "Speed", "step_of": "of" },
"timeline": { "title": "Learning Path", "subtitle": "s01 to s12: Progressive Agent Design", "layer_legend": "Layer Legend", "loc_growth": "LOC Growth", "learn_more": "Learn More" },
"layers": {
"title": "Architectural Layers",
"subtitle": "Five orthogonal concerns that compose into a complete agent",
"tools": "What the agent CAN do. The foundation: tools give the model capabilities to interact with the world.",
"planning": "How work is organized. From simple todo lists to dependency-aware task boards shared across agents.",
"memory": "Keeping context within limits. Compression strategies that let agents work infinitely without losing coherence.",
"concurrency": "Non-blocking execution. Background threads and notification buses for parallel work.",
"collaboration": "Multi-agent coordination. Teams, messaging, and autonomous teammates that think for themselves."
},
"compare": {
"title": "Compare Versions",
"subtitle": "See what changed between any two versions",
"select_a": "Version A",
"select_b": "Version B",
"loc_delta": "LOC Delta",
"lines": "lines",
"new_tools_in_b": "New Tools in B",
"new_classes_in_b": "New Classes in B",
"new_functions_in_b": "New Functions in B",
"tool_comparison": "Tool Comparison",
"only_in": "Only in",
"shared": "Shared",
"none": "None",
"source_diff": "Source Code Diff",
"empty_hint": "Select two versions above to compare them.",
"architecture": "Architecture"
},
"diff": {
"new_classes": "New Classes",
"new_tools": "New Tools",
"new_functions": "New Functions",
"loc_delta": "LOC Delta"
},
"sessions": {
"s01": "The Agent Loop",
"s02": "Tools",
"s03": "TodoWrite",
"s04": "Subagents",
"s05": "Skills",
"s06": "Compact",
"s07": "Tasks",
"s08": "Background Tasks",
"s09": "Agent Teams",
"s10": "Team Protocols",
"s11": "Autonomous Agents",
"s12": "Worktree + Task Isolation"
},
"layer_labels": {
"tools": "Tools & Execution",
"planning": "Planning & Coordination",
"memory": "Memory Management",
"concurrency": "Concurrency",
"collaboration": "Collaboration"
},
"viz": {
"s01": "The Agent While-Loop",
"s02": "Tool Dispatch Map",
"s03": "TodoWrite Nag System",
"s04": "Subagent Context Isolation",
"s05": "On-Demand Skill Loading",
"s06": "Three-Layer Context Compression",
"s07": "Task Dependency Graph",
"s08": "Background Task Lanes",
"s09": "Agent Team Mailboxes",
"s10": "FSM Team Protocols",
"s11": "Autonomous Agent Cycle",
"s12": "Worktree Task Isolation"
}
}
================================================
FILE: web/src/i18n/messages/ja.json
================================================
{
"meta": { "title": "Learn Claude Code", "description": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加" },
"nav": { "home": "ホーム", "timeline": "学習パス", "compare": "バージョン比較", "layers": "アーキテクチャ層", "github": "GitHub" },
"home": { "hero_title": "Learn Claude Code", "hero_subtitle": "0 から 1 へ nano Claude Code-like agent を構築し、毎回 1 つの仕組みを追加", "start": "学習を始める", "core_pattern": "コアパターン", "core_pattern_desc": "すべての AI コーディングエージェントは同じループを共有する:モデルを呼び出し、ツールを実行し、結果を返す。実運用ではこの上にポリシー、権限、ライフサイクル層が重なる。", "learning_path": "学習パス", "learning_path_desc": "12の段階的セッション、シンプルなループから分離された自律実行まで", "layers_title": "アーキテクチャ層", "layers_desc": "5つの直交する関心事が完全なエージェントを構成", "loc": "行", "learn_more": "詳細を見る", "versions_in_layer": "バージョン", "message_flow": "メッセージの増加", "message_flow_desc": "エージェントループ実行時のメッセージ配列の成長を観察" },
"version": { "loc": "行のコード", "tools": "ツール", "new": "新規", "prev": "前のバージョン", "next": "次のバージョン", "view_source": "ソースを見る", "view_diff": "差分を見る", "design_decisions": "設計判断", "whats_new": "新機能", "tutorial": "チュートリアル", "simulator": "エージェントループシミュレーター", "execution_flow": "実行フロー", "architecture": "アーキテクチャ", "concept_viz": "コンセプト可視化", "alternatives": "検討された代替案", "tab_learn": "学習", "tab_simulate": "シミュレーション", "tab_code": "ソースコード", "tab_deep_dive": "詳細分析" },
"sim": { "play": "再生", "pause": "一時停止", "step": "ステップ", "reset": "リセット", "speed": "速度", "step_of": "/" },
"timeline": { "title": "学習パス", "subtitle": "s01からs12へ:段階的エージェント設計", "layer_legend": "レイヤー凡例", "loc_growth": "コード量の推移", "learn_more": "詳細を見る" },
"layers": {
"title": "アーキテクチャ層",
"subtitle": "5つの直交する関心事が完全なエージェントを構成",
"tools": "エージェントができること。基盤:ツールがモデルに外部世界と対話する能力を与える。",
"planning": "作業の組織化。シンプルなToDoリストからエージェント間で共有される依存関係対応タスクボードまで。",
"memory": "コンテキスト制限内での記憶保持。圧縮戦略によりエージェントが一貫性を失わずに無限に作業可能。",
"concurrency": "ノンブロッキング実行。バックグラウンドスレッドと通知バスによる並列作業。",
"collaboration": "マルチエージェント連携。チーム、メッセージング、自律的に考えるチームメイト。"
},
"compare": {
"title": "バージョン比較",
"subtitle": "任意の2つのバージョン間の変更を確認",
"select_a": "バージョンA",
"select_b": "バージョンB",
"loc_delta": "コード量の差分",
"lines": "行",
"new_tools_in_b": "Bの新規ツール",
"new_classes_in_b": "Bの新規クラス",
"new_functions_in_b": "Bの新規関数",
"tool_comparison": "ツール比較",
"only_in": "のみ",
"shared": "共通",
"none": "なし",
"source_diff": "ソースコード差分",
"empty_hint": "上で2つのバージョンを選択して比較してください。",
"architecture": "アーキテクチャ"
},
"diff": {
"new_classes": "新規クラス",
"new_tools": "新規ツール",
"new_functions": "新規関数",
"loc_delta": "コード量の差分"
},
"sessions": {
"s01": "エージェントループ",
"s02": "ツール",
"s03": "TodoWrite",
"s04": "サブエージェント",
"s05": "スキル",
"s06": "コンテキスト圧縮",
"s07": "タスクシステム",
"s08": "バックグラウンドタスク",
"s09": "エージェントチーム",
"s10": "チームプロトコル",
"s11": "自律エージェント",
"s12": "Worktree + タスク分離"
},
"layer_labels": {
"tools": "ツールと実行",
"planning": "計画と調整",
"memory": "メモリ管理",
"concurrency": "並行処理",
"collaboration": "コラボレーション"
},
"viz": {
"s01": "エージェント Whileループ",
"s02": "ツールディスパッチマップ",
"s03": "TodoWrite リマインドシステム",
"s04": "サブエージェント コンテキスト分離",
"s05": "オンデマンド スキルローディング",
"s06": "3層コンテキスト圧縮",
"s07": "タスク依存関係グラフ",
"s08": "バックグラウンドタスクレーン",
"s09": "エージェントチーム メールボックス",
"s10": "FSM チームプロトコル",
"s11": "自律エージェントサイクル",
"s12": "Worktree タスク分離"
}
}
================================================
FILE: web/src/i18n/messages/zh.json
================================================
{
"meta": { "title": "Learn Claude Code", "description": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制" },
"nav": { "home": "首页", "timeline": "学习路径", "compare": "版本对比", "layers": "架构层", "github": "GitHub" },
"home": { "hero_title": "Learn Claude Code", "hero_subtitle": "从 0 到 1 构建 nano Claude Code-like agent,每次只加一个机制", "start": "开始学习", "core_pattern": "核心模式", "core_pattern_desc": "所有 AI 编程 Agent 共享同一个循环:调用模型、执行工具、回传结果。生产级系统会在其上叠加策略、权限和生命周期层。", "learning_path": "学习路径", "learning_path_desc": "12 个渐进式课程,从简单循环到隔离化自治执行", "layers_title": "架构层次", "layers_desc": "五个正交关注点组合成完整的 Agent", "loc": "行", "learn_more": "了解更多", "versions_in_layer": "个版本", "message_flow": "消息增长", "message_flow_desc": "观察 Agent 循环执行时消息数组的增长" },
"version": { "loc": "行代码", "tools": "个工具", "new": "新增", "prev": "上一版", "next": "下一版", "view_source": "查看源码", "view_diff": "查看变更", "design_decisions": "设计决策", "whats_new": "新增内容", "tutorial": "教程", "simulator": "Agent 循环模拟器", "execution_flow": "执行流程", "architecture": "架构", "concept_viz": "概念可视化", "alternatives": "替代方案", "tab_learn": "学习", "tab_simulate": "模拟", "tab_code": "源码", "tab_deep_dive": "深入探索" },
"sim": { "play": "播放", "pause": "暂停", "step": "单步", "reset": "重置", "speed": "速度", "step_of": "/" },
"timeline": { "title": "学习路径", "subtitle": "s01 到 s12:渐进式 Agent 设计", "layer_legend": "层次图例", "loc_growth": "代码量增长", "learn_more": "了解更多" },
"layers": {
"title": "架构层次",
"subtitle": "五个正交关注点组合成完整的 Agent",
"tools": "Agent 能做什么。基础层:工具赋予模型与外部世界交互的能力。",
"planning": "如何组织工作。从简单的待办列表到跨 Agent 共享的依赖感知任务板。",
"memory": "在上下文限制内保持记忆。压缩策略让 Agent 可以无限工作而不失去连贯性。",
"concurrency": "非阻塞执行。后台线程和通知总线实现并行工作。",
"collaboration": "多 Agent 协作。团队、消息传递和能独立思考的自主队友。"
},
"compare": {
"title": "版本对比",
"subtitle": "查看任意两个版本之间的变化",
"select_a": "版本 A",
"select_b": "版本 B",
"loc_delta": "代码量差异",
"lines": "行",
"new_tools_in_b": "B 中新增工具",
"new_classes_in_b": "B 中新增类",
"new_functions_in_b": "B 中新增函数",
"tool_comparison": "工具对比",
"only_in": "仅在",
"shared": "共有",
"none": "无",
"source_diff": "源码差异",
"empty_hint": "请在上方选择两个版本进行对比。",
"architecture": "架构"
},
"diff": {
"new_classes": "新增类",
"new_tools": "新增工具",
"new_functions": "新增函数",
"loc_delta": "代码量差异"
},
"sessions": {
"s01": "Agent 循环",
"s02": "工具",
"s03": "TodoWrite",
"s04": "子 Agent",
"s05": "技能",
"s06": "上下文压缩",
"s07": "任务系统",
"s08": "后台任务",
"s09": "Agent 团队",
"s10": "团队协议",
"s11": "自主 Agent",
"s12": "Worktree + 任务隔离"
},
"layer_labels": {
"tools": "工具与执行",
"planning": "规划与协调",
"memory": "内存管理",
"concurrency": "并发",
"collaboration": "协作"
},
"viz": {
"s01": "Agent While 循环",
"s02": "工具分发映射",
"s03": "TodoWrite 提醒系统",
"s04": "子 Agent 上下文隔离",
"s05": "按需技能加载",
"s06": "三层上下文压缩",
"s07": "任务依赖图",
"s08": "后台任务通道",
"s09": "Agent 团队邮箱",
"s10": "FSM 团队协议",
"s11": "自主 Agent 循环",
"s12": "Worktree 任务隔离"
}
}
================================================
FILE: web/src/lib/constants.ts
================================================
export const VERSION_ORDER = [
"s01", "s02", "s03", "s04", "s05", "s06", "s07", "s08", "s09", "s10", "s11", "s12"
] as const;
export const LEARNING_PATH = VERSION_ORDER;
export type VersionId = typeof LEARNING_PATH[number];
export const VERSION_META: Record = {
s01: { title: "The Agent Loop", subtitle: "Bash is All You Need", coreAddition: "Single-tool agent loop", keyInsight: "The minimal agent kernel is a while loop + one tool", layer: "tools", prevVersion: null },
s02: { title: "Tools", subtitle: "One Handler Per Tool", coreAddition: "Tool dispatch map", keyInsight: "The loop stays the same; new tools register into the dispatch map", layer: "tools", prevVersion: "s01" },
s03: { title: "TodoWrite", subtitle: "Plan Before You Act", coreAddition: "TodoManager + nag reminder", keyInsight: "An agent without a plan drifts; list the steps first, then execute", layer: "planning", prevVersion: "s02" },
s04: { title: "Subagents", subtitle: "Clean Context Per Subtask", coreAddition: "Subagent spawn with isolated messages[]", keyInsight: "Subagents use independent messages[], keeping the main conversation clean", layer: "planning", prevVersion: "s03" },
s05: { title: "Skills", subtitle: "Load on Demand", coreAddition: "SkillLoader + two-layer injection", keyInsight: "Inject knowledge via tool_result when needed, not upfront in the system prompt", layer: "planning", prevVersion: "s04" },
s06: { title: "Compact", subtitle: "Three-Layer Compression", coreAddition: "micro-compact + auto-compact + archival", keyInsight: "Context will fill up; three-layer compression strategy enables infinite sessions", layer: "memory", prevVersion: "s05" },
s07: { title: "Tasks", subtitle: "Task Graph + Dependencies", coreAddition: "TaskManager with file-based state + dependency graph", keyInsight: "A file-based task graph with ordering, parallelism, and dependencies -- the coordination backbone for multi-agent work", layer: "planning", prevVersion: "s06" },
s08: { title: "Background Tasks", subtitle: "Background Threads + Notifications", coreAddition: "BackgroundManager + notification queue", keyInsight: "Run slow operations in the background; the agent keeps thinking ahead", layer: "concurrency", prevVersion: "s07" },
s09: { title: "Agent Teams", subtitle: "Teammates + Mailboxes", coreAddition: "TeammateManager + file-based mailbox", keyInsight: "When one agent can't finish, delegate to persistent teammates via async mailboxes", layer: "collaboration", prevVersion: "s08" },
s10: { title: "Team Protocols", subtitle: "Shared Communication Rules", coreAddition: "request_id correlation for two protocols", keyInsight: "One request-response pattern drives all team negotiation", layer: "collaboration", prevVersion: "s09" },
s11: { title: "Autonomous Agents", subtitle: "Scan Board, Claim Tasks", coreAddition: "Task board polling + timeout-based self-governance", keyInsight: "Teammates scan the board and claim tasks themselves; no need for the lead to assign each one", layer: "collaboration", prevVersion: "s10" },
s12: { title: "Worktree + Task Isolation", subtitle: "Isolate by Directory", coreAddition: "Composable worktree lifecycle + event stream over a shared task board", keyInsight: "Each works in its own directory; tasks manage goals, worktrees manage directories, bound by ID", layer: "collaboration", prevVersion: "s11" },
};
export const LAYERS = [
{ id: "tools" as const, label: "Tools & Execution", color: "#3B82F6", versions: ["s01", "s02"] },
{ id: "planning" as const, label: "Planning & Coordination", color: "#10B981", versions: ["s03", "s04", "s05", "s07"] },
{ id: "memory" as const, label: "Memory Management", color: "#8B5CF6", versions: ["s06"] },
{ id: "concurrency" as const, label: "Concurrency", color: "#F59E0B", versions: ["s08"] },
{ id: "collaboration" as const, label: "Collaboration", color: "#EF4444", versions: ["s09", "s10", "s11", "s12"] },
] as const;
================================================
FILE: web/src/lib/i18n-server.ts
================================================
import en from "@/i18n/messages/en.json";
import zh from "@/i18n/messages/zh.json";
import ja from "@/i18n/messages/ja.json";
type Messages = typeof en;
const messagesMap: Record = { en, zh, ja };
export function getTranslations(locale: string, namespace: string) {
const messages = messagesMap[locale] || en;
const ns = (messages as Record>)[namespace];
const fallbackNs = (en as Record>)[namespace];
return (key: string): string => {
return ns?.[key] || fallbackNs?.[key] || key;
};
}
================================================
FILE: web/src/lib/i18n.tsx
================================================
"use client";
import { createContext, useContext, ReactNode } from "react";
import en from "@/i18n/messages/en.json";
import zh from "@/i18n/messages/zh.json";
import ja from "@/i18n/messages/ja.json";
type Messages = typeof en;
const messagesMap: Record = { en, zh, ja };
const I18nContext = createContext<{ locale: string; messages: Messages }>({
locale: "en",
messages: en,
});
export function I18nProvider({ locale, children }: { locale: string; children: ReactNode }) {
const messages = messagesMap[locale] || en;
return (
{children}
);
}
export function useTranslations(namespace?: string) {
const { messages } = useContext(I18nContext);
return (key: string) => {
const ns = namespace ? (messages as any)[namespace] : messages;
if (!ns) return key;
return (ns as any)[key] || key;
};
}
export function useLocale() {
return useContext(I18nContext).locale;
}
================================================
FILE: web/src/lib/utils.ts
================================================
export function cn(...classes: (string | undefined | null | false)[]) {
return classes.filter(Boolean).join(" ");
}
================================================
FILE: web/src/types/agent-data.ts
================================================
export interface AgentVersion {
id: string;
filename: string;
title: string;
subtitle: string;
loc: number;
tools: string[];
newTools: string[];
coreAddition: string;
keyInsight: string;
classes: { name: string; startLine: number; endLine: number }[];
functions: { name: string; signature: string; startLine: number }[];
layer: "tools" | "planning" | "memory" | "concurrency" | "collaboration";
source: string;
}
export interface VersionDiff {
from: string;
to: string;
newClasses: string[];
newFunctions: string[];
newTools: string[];
locDelta: number;
}
export interface DocContent {
version: string;
locale: "en" | "zh" | "ja";
title: string;
content: string; // raw markdown
}
export interface VersionIndex {
versions: AgentVersion[];
diffs: VersionDiff[];
}
export type SimStepType =
| "user_message"
| "assistant_text"
| "tool_call"
| "tool_result"
| "system_event";
export interface SimStep {
type: SimStepType;
content: string;
annotation: string;
toolName?: string;
toolInput?: string;
}
export interface Scenario {
version: string;
title: string;
description: string;
steps: SimStep[];
}
export interface FlowNode {
id: string;
label: string;
type: "start" | "process" | "decision" | "subprocess" | "end";
x: number;
y: number;
}
export interface FlowEdge {
from: string;
to: string;
label?: string;
}
================================================
FILE: web/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2018",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "react-jsx",
"incremental": true,
"plugins": [
{
"name": "next"
}
],
"paths": {
"@/*": ["./src/*"]
}
},
"include": [
"next-env.d.ts",
"**/*.ts",
"**/*.tsx",
".next/types/**/*.ts",
".next/dev/types/**/*.ts",
"**/*.mts"
],
"exclude": ["node_modules"]
}
================================================
FILE: web/vercel.json
================================================
{
"redirects": [
{
"source": "/:path(.*)",
"has": [
{
"type": "host",
"value": "learn-claude-agents.vercel.app"
}
],
"destination": "https://learn.shareai.run/:path",
"permanent": true
},
{
"source": "/",
"destination": "/en",
"permanent": false
}
]
}