Repository: humanlayer/12-factor-agents
Branch: main
Commit: d20c728368bf
Files: 429
Total size: 1.0 MB

Directory structure:
gitextract_8e8ds6gd/

├── .gitignore
├── CLAUDE.md
├── LICENSE
├── Makefile
├── README.md
├── content/
│   ├── appendix-13-pre-fetch.md
│   ├── brief-history-of-software.md
│   ├── factor-01-natural-language-to-tool-calls.md
│   ├── factor-02-own-your-prompts.md
│   ├── factor-03-own-your-context-window.md
│   ├── factor-04-tools-are-structured-outputs.md
│   ├── factor-05-unify-execution-state.md
│   ├── factor-06-launch-pause-resume.md
│   ├── factor-07-contact-humans-with-tools.md
│   ├── factor-08-own-your-control-flow.md
│   ├── factor-09-compact-errors.md
│   ├── factor-1-natural-language-to-tool-calls.md
│   ├── factor-10-small-focused-agents.md
│   ├── factor-11-trigger-from-anywhere.md
│   ├── factor-12-stateless-reducer.md
│   ├── factor-2-own-your-prompts.md
│   ├── factor-3-own-your-context-window.md
│   ├── factor-4-tools-are-structured-outputs.md
│   ├── factor-5-unify-execution-state.md
│   ├── factor-6-launch-pause-resume.md
│   ├── factor-7-contact-humans-with-tools.md
│   ├── factor-8-own-your-control-flow.md
│   └── factor-9-compact-errors.md
├── drafts/
│   ├── a2h-spec.md
│   └── ah2-openapi.json
├── hack/
│   └── contributors_markdown/
│       ├── .python-version
│       ├── README.md
│       ├── contributors_markdown.py
│       └── pyproject.toml
├── packages/
│   ├── create-12-factor-agent/
│   │   └── template/
│   │       ├── .gitignore
│   │       ├── README.md
│   │       ├── baml_src/
│   │       │   ├── agent.baml
│   │       │   ├── clients.baml
│   │       │   ├── generators.baml
│   │       │   └── tool_calculator.baml
│   │       ├── package.json
│   │       ├── src/
│   │       │   ├── a2h.ts
│   │       │   ├── agent.ts
│   │       │   ├── cli.ts
│   │       │   ├── index.ts
│   │       │   ├── server.ts
│   │       │   └── state.ts
│   │       └── tsconfig.json
│   └── walkthroughgen/
│       ├── .gitignore
│       ├── examples/
│       │   ├── typescript/
│       │   │   ├── .gitignore
│       │   │   ├── walkthrough/
│       │   │   │   ├── 00-package-lock.json
│       │   │   │   ├── 00-package.json
│       │   │   │   ├── 00-tsconfig.json
│       │   │   │   ├── 01-index.ts
│       │   │   │   ├── 02-cli.ts
│       │   │   │   └── 02-index.ts
│       │   │   └── walkthrough.yaml
│       │   └── walkthroughgen/
│       │       └── walkthrough.yaml
│       ├── jest.config.js
│       ├── package.json
│       ├── prompt.md
│       ├── readme.md
│       ├── src/
│       │   ├── cli.ts
│       │   └── index.ts
│       ├── test/
│       │   ├── e2e/
│       │   │   └── test-e2e.ts
│       │   └── utils/
│       │       ├── console-mock.ts
│       │       └── temp-dir.ts
│       └── tsconfig.json
└── workshops/
    ├── .gitignore
    ├── .python-version
    ├── 2025-05/
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── final/
    │   │   ├── .gitignore
    │   │   ├── baml_src/
    │   │   │   ├── agent.baml
    │   │   │   ├── clients.baml
    │   │   │   ├── generators.baml
    │   │   │   └── tool_calculator.baml
    │   │   ├── package.json
    │   │   ├── src/
    │   │   │   ├── agent.ts
    │   │   │   ├── cli.ts
    │   │   │   ├── index.ts
    │   │   │   ├── server.ts
    │   │   │   └── state.ts
    │   │   └── tsconfig.json
    │   ├── sections/
    │   │   ├── 00-hello-world/
    │   │   │   ├── README.md
    │   │   │   └── walkthrough/
    │   │   │       ├── 00-.gitignore
    │   │   │       ├── 00-index.ts
    │   │   │       ├── 00-package.json
    │   │   │       └── 00-tsconfig.json
    │   │   ├── 01-cli-and-agent/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   └── index.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 01-agent.baml
    │   │   │       ├── 01-agent.ts
    │   │   │       ├── 01-cli.ts
    │   │   │       └── 01-index.ts
    │   │   ├── 02-calculator-tools/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   └── generators.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   └── index.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 02-agent.baml
    │   │   │       └── 02-tool_calculator.baml
    │   │   ├── 03-tool-loop/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   └── index.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 03-agent.ts
    │   │   │       └── 03b-agent.ts
    │   │   ├── 04-baml-tests/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   └── index.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 04-agent.baml
    │   │   │       ├── 04b-agent.baml
    │   │   │       └── 04c-agent.baml
    │   │   ├── 05-human-tools/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   └── index.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 05-agent.baml
    │   │   │       ├── 05-agent.ts
    │   │   │       ├── 05-cli.ts
    │   │   │       ├── 05b-agent.baml
    │   │   │       └── 05c-agent.baml
    │   │   ├── 06-customize-prompt/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   └── index.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       └── 06-agent.baml
    │   │   ├── 07-context-window/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   └── index.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 07-agent.ts
    │   │   │       ├── 07b-agent.ts
    │   │   │       └── 07c-agent.baml
    │   │   ├── 08-api-endpoints/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   └── index.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       └── 08-server.ts
    │   │   ├── 09-state-management/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   ├── index.ts
    │   │   │   │   └── server.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 09-server.ts
    │   │   │       └── 09-state.ts
    │   │   ├── 10-human-approval/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── server.ts
    │   │   │   │   └── state.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 10-agent.ts
    │   │   │       └── 10-server.ts
    │   │   ├── 11-humanlayer-approval/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── server.ts
    │   │   │   │   └── state.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 11-cli.ts
    │   │   │       ├── 11b-cli.ts
    │   │   │       └── 11c-cli.ts
    │   │   ├── 12-humanlayer-webhook/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   ├── generators.baml
    │   │   │   │   └── tool_calculator.baml
    │   │   │   ├── package.json
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── server.ts
    │   │   │   │   └── state.ts
    │   │   │   ├── tsconfig.json
    │   │   │   └── walkthrough/
    │   │   │       ├── 12-1-server-init.ts
    │   │   │       └── 12a-server.ts
    │   │   └── final/
    │   │       ├── .gitignore
    │   │       ├── README.md
    │   │       ├── baml_src/
    │   │       │   ├── agent.baml
    │   │       │   ├── clients.baml
    │   │       │   ├── generators.baml
    │   │       │   └── tool_calculator.baml
    │   │       ├── package.json
    │   │       ├── src/
    │   │       │   ├── agent.ts
    │   │       │   ├── cli.ts
    │   │       │   ├── index.ts
    │   │       │   ├── server.ts
    │   │       │   └── state.ts
    │   │       └── tsconfig.json
    │   ├── walkthrough/
    │   │   ├── 00-.gitignore
    │   │   ├── 00-index.ts
    │   │   ├── 00-package.json
    │   │   ├── 00-tsconfig.json
    │   │   ├── 01-agent.baml
    │   │   ├── 01-agent.ts
    │   │   ├── 01-cli.ts
    │   │   ├── 01-index.ts
    │   │   ├── 02-agent.baml
    │   │   ├── 02-tool_calculator.baml
    │   │   ├── 03-agent.ts
    │   │   ├── 03b-agent.ts
    │   │   ├── 04-agent.baml
    │   │   ├── 04b-agent.baml
    │   │   ├── 04c-agent.baml
    │   │   ├── 05-agent.baml
    │   │   ├── 05-agent.ts
    │   │   ├── 05-cli.ts
    │   │   ├── 05b-agent.baml
    │   │   ├── 05c-agent.baml
    │   │   ├── 06-agent.baml
    │   │   ├── 07-agent.ts
    │   │   ├── 07b-agent.ts
    │   │   ├── 07c-agent.baml
    │   │   ├── 08-server.ts
    │   │   ├── 09-server.ts
    │   │   ├── 09-state.ts
    │   │   ├── 10-agent.ts
    │   │   ├── 10-server.ts
    │   │   ├── 11-cli.ts
    │   │   ├── 11b-cli.ts
    │   │   ├── 11c-cli.ts
    │   │   ├── 12-1-server-init.ts
    │   │   ├── 12-server.ts
    │   │   ├── 12a-server.ts
    │   │   ├── 12aa-server.ts
    │   │   └── 12b-server.ts
    │   ├── walkthrough.md
    │   └── walkthrough.yaml
    ├── 2025-05-17/
    │   ├── .gitignore
    │   ├── sections/
    │   │   ├── 00-hello-world/
    │   │   │   ├── README.md
    │   │   │   └── walkthrough/
    │   │   │       ├── 00-.gitignore
    │   │   │       ├── 00-index.ts
    │   │   │       ├── 00-package.json
    │   │   │       └── 00-tsconfig.json
    │   │   ├── 01-cli-and-agent/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── src/
    │   │   │   │   └── index.ts
    │   │   │   └── walkthrough/
    │   │   │       ├── 01-agent.baml
    │   │   │       ├── 01-agent.ts
    │   │   │       ├── 01-cli.ts
    │   │   │       └── 01-index.ts
    │   │   ├── 02-calculator-tools/
    │   │   │   ├── .gitignore
    │   │   │   ├── README.md
    │   │   │   ├── baml_src/
    │   │   │   │   ├── agent.baml
    │   │   │   │   ├── clients.baml
    │   │   │   │   └── generators.baml
    │   │   │   ├── src/
    │   │   │   │   ├── agent.ts
    │   │   │   │   ├── cli.ts
    │   │   │   │   └── index.ts
    │   │   │   └── walkthrough/
    │   │   │       ├── 02-agent.baml
    │   │   │       └── 02-tool_calculator.baml
    │   │   └── 03-tool-loop/
    │   │       ├── .gitignore
    │   │       ├── README.md
    │   │       ├── baml_src/
    │   │       │   ├── agent.baml
    │   │       │   ├── clients.baml
    │   │       │   ├── generators.baml
    │   │       │   └── tool_calculator.baml
    │   │       ├── src/
    │   │       │   ├── agent.ts
    │   │       │   ├── cli.ts
    │   │       │   └── index.ts
    │   │       └── walkthrough/
    │   │           ├── 03-agent.ts
    │   │           └── 03b-agent.ts
    │   ├── walkthrough/
    │   │   ├── 00-.gitignore
    │   │   ├── 00-index.ts
    │   │   ├── 00-package.json
    │   │   ├── 00-tsconfig.json
    │   │   ├── 01-agent.baml
    │   │   ├── 01-agent.ts
    │   │   ├── 01-cli.ts
    │   │   ├── 01-index.ts
    │   │   ├── 02-agent.baml
    │   │   ├── 02-tool_calculator.baml
    │   │   ├── 03-agent.ts
    │   │   ├── 03b-agent.ts
    │   │   ├── 04-agent.baml
    │   │   ├── 04b-agent.baml
    │   │   ├── 04c-agent.baml
    │   │   ├── 05-agent.baml
    │   │   ├── 05-agent.ts
    │   │   ├── 05-cli.ts
    │   │   ├── 05b-agent.baml
    │   │   ├── 05c-agent.baml
    │   │   ├── 06-agent.baml
    │   │   ├── 07-agent.ts
    │   │   ├── 07b-agent.ts
    │   │   ├── 07c-agent.baml
    │   │   ├── 08-server.ts
    │   │   ├── 09-server.ts
    │   │   ├── 09-state.ts
    │   │   ├── 10-agent.ts
    │   │   ├── 10-server.ts
    │   │   ├── 11-cli.ts
    │   │   ├── 11b-cli.ts
    │   │   ├── 11c-cli.ts
    │   │   ├── 12-1-server-init.ts
    │   │   ├── 12-server.ts
    │   │   ├── 12a-server.ts
    │   │   ├── 12aa-server.ts
    │   │   └── 12b-server.ts
    │   ├── walkthrough.md
    │   └── walkthrough.yaml
    └── 2025-07-16/
        ├── .gitignore
        ├── CLAUDE.md
        ├── hack/
        │   ├── analyze_log_capture.py
        │   ├── inspect_notebook.py
        │   ├── minimal_test.ipynb
        │   ├── test_log_capture.sh
        │   └── testing.md
        ├── pyproject.toml
        ├── test_notebook_colab_sim.sh
        ├── walkthrough/
        │   ├── 00-.gitignore
        │   ├── 00-main.py
        │   ├── 00-package.json
        │   ├── 00-tsconfig.json
        │   ├── 01-agent.baml
        │   ├── 01-agent.py
        │   ├── 01-main.py
        │   ├── 02-agent.baml
        │   ├── 02-main.py
        │   ├── 02-tool_calculator.baml
        │   ├── 03-agent.py
        │   ├── 03-main.py
        │   ├── 03b-agent.py
        │   ├── 03b-agent.ts
        │   ├── 04-agent.baml
        │   ├── 04b-agent.baml
        │   ├── 04c-agent.baml
        │   ├── 05-agent.baml
        │   ├── 05-agent.py
        │   ├── 05-main.py
        │   ├── 05b-agent.baml
        │   ├── 05c-agent.baml
        │   ├── 06-agent.baml
        │   ├── 07-agent.py
        │   ├── 07-main.py
        │   ├── 07b-agent.ts
        │   ├── 07c-agent.baml
        │   ├── 08-server.ts
        │   ├── 09-server.ts
        │   ├── 09-state.ts
        │   ├── 10-agent.ts
        │   ├── 10-server.ts
        │   ├── 11-cli.ts
        │   ├── 11b-cli.ts
        │   ├── 11c-cli.ts
        │   ├── 12-1-server-init.ts
        │   ├── 12-server.ts
        │   ├── 12a-server.ts
        │   ├── 12aa-server.ts
        │   └── 12b-server.ts
        ├── walkthrough.yaml
        ├── walkthrough_python_enhanced.yaml
        ├── walkthroughgen_py.py
        └── workshop_final.ipynb

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
.promptx


================================================
FILE: CLAUDE.md
================================================
# AI Assistant Instructions

**IMPORTANT: Copy or merge this file into your project's CLAUDE.md file to activate agent personas.**

## 🚨 MANDATORY PERSONA SELECTION

**CRITICAL: You MUST adopt one of the specialized personas before proceeding with any work.**

**BEFORE DOING ANYTHING ELSE**, you must read and adopt one of these personas:

1. **Developer Agent** - Read `.promptx/personas/agent-developer.md` - For coding, debugging, and implementation tasks
2. **Code Reviewer Agent** - Read `.promptx/personas/agent-code-reviewer.md` - For reviewing code changes and quality assurance
3. **Rebaser Agent** - Read `.promptx/personas/agent-rebaser.md` - For cleaning git history and rebasing changes
4. **Merger Agent** - Read `.promptx/personas/agent-merger.md` - For merging code across branches
5. **Multiplan Manager Agent** - Read `.promptx/personas/agent-multiplan-manager.md` - For orchestrating parallel work and creating plans

**DO NOT PROCEED WITHOUT SELECTING A PERSONA.** Each persona has specific rules, workflows, and tools that you MUST follow exactly.

## How to Choose Your Persona

- **Asked to write code, fix bugs, or implement features?** → Use Developer Agent
- **Asked to review code changes?** → Use Code Reviewer Agent  
- **Asked to clean git history or rebase changes?** → Use Rebaser Agent
- **Asked to merge branches or consolidate work?** → Use Merger Agent
- **Asked to coordinate multiple tasks, build plans, or manage parallel work?** → Use Multiplan Manager Agent

## Project Context

[CUSTOMIZE THIS SECTION FOR YOUR PROJECT]

This project uses:
- **Language/Framework**: [Add your stack here]
- **Build Tool**: [Add your build commands]
- **Testing**: [Add your test commands]  
- **Architecture**: [Describe your project structure]

## Core Principles (All Personas)

1. **READ FIRST**: Always read at least 1500 lines to understand context fully
2. **DELETE MORE THAN YOU ADD**: Complexity compounds into disasters
3. **FOLLOW EXISTING PATTERNS**: Don't invent new approaches
4. **BUILD AND TEST**: Run your build and test commands after changes
5. **COMMIT FREQUENTLY**: Every 5-10 minutes for meaningful progress

## File Structure Reference

[CUSTOMIZE THIS SECTION FOR YOUR PROJECT]

```
./
├── package.json          # [or your dependency file]
├── src/                  # [your source directory]
│   ├── [your modules]
│   └── [your files]
├── test/                 # [your test directory]
├── .promptx/             # Agent personas (created by promptx init)
│   └── personas/
└── CLAUDE.md            # This file (after merging)
```

## Common Commands (All Personas)

[CUSTOMIZE THIS SECTION FOR YOUR PROJECT]

```bash
# Build project
[your build command]

# Run tests  
[your test command]

# Lint code
[your lint command]

# Deploy locally
[your deploy command]
```

## CRITICAL REMINDER

**You CANNOT proceed without adopting a persona.** Each persona has:
- Specific workflows and rules
- Required tools and commands  
- Success criteria and verification steps
- Commit and progress requirements

**Choose your persona now and follow its instructions exactly.**

---

*Generated by promptx - Agent personas are in .promptx/personas/*


================================================
FILE: LICENSE
================================================
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION

   1. Definitions.

      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.

      "Licensor" shall mean the copyright owner or entity granting the License.

      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of control, an entity
      is assumed to be under the control of another entity if the entity
      that controls the other entity owns directly or indirectly fifty
      percent (50%) or more of the outstanding shares, or if there is some
      other contractual arrangement whereby the first entity effectively
      controls the management decisions of the other entity.

      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.

      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.

      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.

      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (which shall not include Communication that is clearly marked or
      otherwise designated in writing by the copyright owner as "Not a Contribution").

      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based upon (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and separate works based upon the Work.

      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control
      systems, and issue tracking systems that are managed by, or on behalf
      of, the Licensor for the purpose of discussing and improving the Work,
      but excluding communication that is clearly marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."

   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to use, reproduce, modify, merge, publish,
      distribute, sublicense, and/or sell copies of the Work, and to
      permit persons to whom the Work is furnished to do so, subject to
      the following conditions:

      The above copyright notice and this permission notice shall be
      included in all copies or substantial portions of the Work.

   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.

   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:

      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and

      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and

      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, trademark, patent,
          attribution and other notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and

      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.

      You may add Your own copyright notice to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.

   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.

   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.

   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.

   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.

   9. Accepting Warranty or Support. You can choose to offer,
      and charge a fee for, warranty, support, indemnity or other
      liability obligations and/or rights consistent with this License.
      However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or support.

   END OF TERMS AND CONDITIONS

   APPENDIX: How to apply the Apache License to your work.

      To apply the Apache License to your work, attach the following
      boilerplate notice, with the fields enclosed by brackets "[]"
      replaced with your own identifying information. (Don't include
      the brackets!)  The text should be enclosed in comments for
      the particular file format. (We recommend that you include a
      file named COPYING in your distribution, which contains the
      complete text of the license.)

      Copyright [yyyy] [name of copyright owner]

      Licensed under the Apache License, Version 2.0 (the "License");
      you may not use this file except in compliance with the License.
      You may obtain a copy of the License at

          http://www.apache.org/licenses/LICENSE-2.0

      Unless required by applicable law or agreed to in writing, software
      distributed under the License is distributed on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
      See the License for the specific language governing permissions and
      limitations under the License.

================================================
FILE: Makefile
================================================
# Makefile for launch compatibility
.PHONY: setup teardown

setup:
	@echo "Setting up project..."
	@npm install || bun install || yarn install
	@echo "Setup complete!"

teardown:
	@echo "Tearing down project..."
	@rm -rf node_modules
	@echo "Teardown complete!"


================================================
FILE: README.md
================================================
# 12-Factor Agents - Principles for building reliable LLM applications

<div align="center">
<a href="https://www.apache.org/licenses/LICENSE-2.0">
        <img src="https://img.shields.io/badge/Code-Apache%202.0-blue.svg" alt="Code License: Apache 2.0"></a>
<a href="https://creativecommons.org/licenses/by-sa/4.0/">
        <img src="https://img.shields.io/badge/Content-CC%20BY--SA%204.0-lightgrey.svg" alt="Content License: CC BY-SA 4.0"></a>
<a href="https://humanlayer.dev/discord">
    <img src="https://img.shields.io/badge/chat-discord-5865F2" alt="Discord Server"></a>
<a href="https://www.youtube.com/watch?v=8kMaTybvDUw">
    <img src="https://img.shields.io/badge/aidotengineer-conf_talk_(17m)-white" alt="YouTube
Deep Dive"></a>
<a href="https://www.youtube.com/watch?v=yxJDyQ8v6P0">
    <img src="https://img.shields.io/badge/youtube-deep_dive-crimson" alt="YouTube
Deep Dive"></a>
    
</div>

<p></p>

*In the spirit of [12 Factor Apps](https://12factor.net/)*.  *The source for this project is public at https://github.com/humanlayer/12-factor-agents, and I welcome your feedback and contributions. Let's figure this out together!*

> [!TIP]
> Missed the AI Engineer World's Fair? [Catch the talk here](https://www.youtube.com/watch?v=8kMaTybvDUw)
>
> Looking for Context Engineering? [Jump straight to factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)
>
> Want to contribute to `npx/uvx create-12-factor-agent` - check out [the discussion thread](https://github.com/humanlayer/12-factor-agents/discussions/61)


<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=2acad99a-c2d9-48df-86f5-9ca8061b7bf9" />

<a href="#visual-nav"><img width="907" alt="Screenshot 2025-04-03 at 2 49 07 PM" src="https://github.com/user-attachments/assets/23286ad8-7bef-4902-b371-88ff6a22e998" /></a>


Hi, I'm Dex. I've been [hacking](https://youtu.be/8bIHcttkOTE) on [AI agents](https://theouterloop.substack.com) for [a while](https://humanlayer.dev). 


**I've tried every agent framework out there**, from the plug-and-play crew/langchains to the "minimalist" smolagents of the world to the "production grade" langraph, griptape, etc. 

**I've talked to a lot of really strong founders**, in and out of YC, who are all building really impressive things with AI. Most of them are rolling the stack themselves. I don't see a lot of frameworks in production customer-facing agents.

**I've been surprised to find** that most of the products out there billing themselves as "AI Agents" are not all that agentic. A lot of them are mostly deterministic code, with LLM steps sprinkled in at just the right points to make the experience truly magical.

Agents, at least the good ones, don't follow the ["here's your prompt, here's a bag of tools, loop until you hit the goal"](https://www.anthropic.com/engineering/building-effective-agents#agents) pattern. Rather, they are comprised of mostly just software. 

So, I set out to answer:

> ### **What are the principles we can use to build LLM-powered software that is actually good enough to put in the hands of production customers?**

Welcome to 12-factor agents. As every Chicago mayor since Daley has consistently plastered all over the city's major airports, we're glad you're here.

*Special thanks to [@iantbutler01](https://github.com/iantbutler01), [@tnm](https://github.com/tnm), [@hellovai](https://www.github.com/hellovai), [@stantonk](https://www.github.com/stantonk), [@balanceiskey](https://www.github.com/balanceiskey), [@AdjectiveAllison](https://www.github.com/AdjectiveAllison), [@pfbyjy](https://www.github.com/pfbyjy), [@a-churchill](https://www.github.com/a-churchill), and the SF MLOps community for early feedback on this guide.*

## The Short Version: The 12 Factors

Even if LLMs [continue to get exponentially more powerful](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md#what-if-llms-get-smarter), there will be core engineering techniques that make LLM-powered software more reliable, more scalable, and easier to maintain.

- [How We Got Here: A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md)
- [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)
- [Factor 2: Own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md)
- [Factor 3: Own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)
- [Factor 4: Tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)
- [Factor 5: Unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)
- [Factor 6: Launch/Pause/Resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md)
- [Factor 7: Contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)
- [Factor 8: Own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md)
- [Factor 9: Compact Errors into Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md)
- [Factor 10: Small, Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md)
- [Factor 11: Trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md)
- [Factor 12: Make your agent a stateless reducer](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md)

### Visual Nav

|    |    |    |
|----|----|-----|
|[![factor 1](https://github.com/humanlayer/12-factor-agents/blob/main/img/110-natural-language-tool-calls.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) | [![factor 2](https://github.com/humanlayer/12-factor-agents/blob/main/img/120-own-your-prompts.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) | [![factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/img/130-own-your-context-building.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) |
|[![factor 4](https://github.com/humanlayer/12-factor-agents/blob/main/img/140-tools-are-just-structured-outputs.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) | [![factor 5](https://github.com/humanlayer/12-factor-agents/blob/main/img/150-unify-state.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) | [![factor 6](https://github.com/humanlayer/12-factor-agents/blob/main/img/160-pause-resume-with-simple-apis.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) |
| [![factor 7](https://github.com/humanlayer/12-factor-agents/blob/main/img/170-contact-humans-with-tools.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) | [![factor 8](https://github.com/humanlayer/12-factor-agents/blob/main/img/180-control-flow.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) | [![factor 9](https://github.com/humanlayer/12-factor-agents/blob/main/img/190-factor-9-errors-static.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) |
| [![factor 10](https://github.com/humanlayer/12-factor-agents/blob/main/img/1a0-small-focused-agents.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) | [![factor 11](https://github.com/humanlayer/12-factor-agents/blob/main/img/1b0-trigger-from-anywhere.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) | [![factor 12](https://github.com/humanlayer/12-factor-agents/blob/main/img/1c0-stateless-reducer.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md) |

## How we got here

For a deeper dive on my agent journey and what led us here, check out [A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) - a quick summary here:

### The promise of agents

We're gonna talk a lot about Directed Graphs (DGs) and their Acyclic friends, DAGs. I'll start by pointing out that...well...software is a directed graph. There's a reason we used to represent programs as flow charts.

![010-software-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/010-software-dag.png)

### From code to DAGs

Around 20 years ago, we started to see DAG orchestrators become popular. We're talking classics like [Airflow](https://airflow.apache.org/), [Prefect](https://www.prefect.io/), some predecessors, and some newer ones like ([dagster](https://dagster.io/), [inggest](https://www.inngest.com/), [windmill](https://www.windmill.dev/)). These followed the same graph pattern, with the added benefit of observability, modularity, retries, administration, etc.

![015-dag-orchestrators](https://github.com/humanlayer/12-factor-agents/blob/main/img/015-dag-orchestrators.png)

### The promise of agents

I'm not the first [person to say this](https://youtu.be/Dc99-zTMyMg?si=bcT0hIwWij2mR-40&t=73), but my biggest takeaway when I started learning about agents, was that you get to throw the DAG away. Instead of software engineers coding each step and edge case, you can give the agent a goal and a set of transitions:

![025-agent-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/025-agent-dag.png)

And let the LLM make decisions in real time to figure out the path

![026-agent-dag-lines](https://github.com/humanlayer/12-factor-agents/blob/main/img/026-agent-dag-lines.png)

The promise here is that you write less software, you just give the LLM the "edges" of the graph and let it figure out the nodes. You can recover from errors, you can write less code, and you may find that LLMs find novel solutions to problems.


### Agents as loops

As we'll see later, it turns out this doesn't quite work.

Let's dive one step deeper - with agents you've got this loop consisting of 3 steps:

1. LLM determines the next step in the workflow, outputting structured json ("tool calling")
2. Deterministic code executes the tool call
3. The result is appended to the context window 
4. Repeat until the next step is determined to be "done"

```python
initial_event = {"message": "..."}
context = [initial_event]
while True:
  next_step = await llm.determine_next_step(context)
  context.append(next_step)

  if (next_step.intent === "done"):
    return next_step.final_answer

  result = await execute_step(next_step)
  context.append(result)
```

Our initial context is just the starting event (maybe a user message, maybe a cron fired, maybe a webhook, etc), and we ask the llm to choose the next step (tool) or to determine that we're done.

Here's a multi-step example:

[![027-agent-loop-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif)](https://github.com/user-attachments/assets/3beb0966-fdb1-4c12-a47f-ed4e8240f8fd)

<details>
<summary><a href="https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif">GIF Version</a></summary>

![027-agent-loop-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif)

</details>

## Why 12-factor agents?

At the end of the day, this approach just doesn't work as well as we want it to.

In building HumanLayer, I've talked to at least 100 SaaS builders (mostly technical founders) looking to make their existing product more agentic. The journey usually goes something like:

1. Decide you want to build an agent
2. Product design, UX mapping, what problems to solve
3. Want to move fast, so grab $FRAMEWORK and *get to building*
4. Get to 70-80% quality bar 
5. Realize that 80% isn't good enough for most customer-facing features
6. Realize that getting past 80% requires reverse-engineering the framework, prompts, flow, etc.
7. Start over from scratch

<details>
<summary>Random Disclaimers</summary>

**DISCLAIMER**: I'm not sure the exact right place to say this, but here seems as good as any: **this in BY NO MEANS meant to be a dig on either the many frameworks out there, or the pretty dang smart people who work on them**. They enable incredible things and have accelerated the AI ecosystem. 

I hope that one outcome of this post is that agent framework builders can learn from the journeys of myself and others, and make frameworks even better. 

Especially for builders who want to move fast but need deep control.

**DISCLAIMER 2**: I'm not going to talk about MCP. I'm sure you can see where it fits in.

**DISCLAIMER 3**: I'm using mostly typescript, for [reasons](https://www.linkedin.com/posts/dexterihorthy_llms-typescript-aiagents-activity-7290858296679313408-Lh9e?utm_source=share&utm_medium=member_desktop&rcm=ACoAAA4oHTkByAiD-wZjnGsMBUL_JT6nyyhOh30) but all this stuff works in python or any other language you prefer. 


Anyways back to the thing...

</details>

### Design Patterns for great LLM applications

After digging through hundreds of AI libriaries and working with dozens of founders, my instinct is this:

1. There are some core things that make agents great
2. Going all in on a framework and building what is essentially a greenfield rewrite may be counter-productive
3. There are some core principles that make agents great, and you will get most/all of them if you pull in a framework
4. BUT, the fastest way I've seen for builders to get high-quality AI software in the hands of customers is to take small, modular concepts from agent building, and incorporate them into their existing product
5. These modular concepts from agents can be defined and applied by most skilled software engineers, even if they don't have an AI background

> #### The fastest way I've seen for builders to get good AI software in the hands of customers is to take small, modular concepts from agent building, and incorporate them into their existing product


## The 12 Factors (again)


- [How We Got Here: A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md)
- [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)
- [Factor 2: Own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md)
- [Factor 3: Own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)
- [Factor 4: Tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)
- [Factor 5: Unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)
- [Factor 6: Launch/Pause/Resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md)
- [Factor 7: Contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)
- [Factor 8: Own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md)
- [Factor 9: Compact Errors into Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md)
- [Factor 10: Small, Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md)
- [Factor 11: Trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md)
- [Factor 12: Make your agent a stateless reducer](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md)

## Honorable Mentions / other advice

- [Factor 13: Pre-fetch all the context you might need](https://github.com/humanlayer/12-factor-agents/blob/main/content/appendix-13-pre-fetch.md)

## Related Resources

- Contribute to this guide [here](https://github.com/humanlayer/12-factor-agents)
- [I talked about a lot of this on an episode of the Tool Use podcast](https://youtu.be/8bIHcttkOTE) in March 2025
- I write about some of this stuff at [The Outer Loop](https://theouterloop.substack.com)
- I do [webinars about Maximizing LLM Performance](https://github.com/hellovai/ai-that-works/tree/main) with [@hellovai](https://github.com/hellovai)
- We build OSS agents with this methodology under [got-agents/agents](https://github.com/got-agents/agents)
- We ignored all our own advice and built a [framework for running distributed agents in kubernetes](https://github.com/humanlayer/kubechain)
- Other links from this guide:
  - [12 Factor Apps](https://12factor.net)
  - [Building Effective Agents (Anthropic)](https://www.anthropic.com/engineering/building-effective-agents#agents)
  - [Prompts are Functions](https://thedataexchange.media/baml-revolution-in-ai-engineering/ )
  - [Library patterns: Why frameworks are evil](https://tomasp.net/blog/2015/library-frameworks/)
  - [The Wrong Abstraction](https://sandimetz.com/blog/2016/1/20/the-wrong-abstraction)
  - [Mailcrew Agent](https://github.com/dexhorthy/mailcrew)
  - [Mailcrew Demo Video](https://www.youtube.com/watch?v=f_cKnoPC_Oo)
  - [Chainlit Demo](https://x.com/chainlit_io/status/1858613325921480922)
  - [TypeScript for LLMs](https://www.linkedin.com/posts/dexterihorthy_llms-typescript-aiagents-activity-7290858296679313408-Lh9e)
  - [Schema Aligned Parsing](https://www.boundaryml.com/blog/schema-aligned-parsing)
  - [Function Calling vs Structured Outputs vs JSON Mode](https://www.vellum.ai/blog/when-should-i-use-function-calling-structured-outputs-or-json-mode)
  - [BAML on GitHub](https://github.com/boundaryml/baml)
  - [OpenAI JSON vs Function Calling](https://docs.llamaindex.ai/en/stable/examples/llm/openai_json_vs_function_calling/)
  - [Outer Loop Agents](https://theouterloop.substack.com/p/openais-realtime-api-is-a-step-towards)
  - [Airflow](https://airflow.apache.org/)
  - [Prefect](https://www.prefect.io/)
  - [Dagster](https://dagster.io/)
  - [Inngest](https://www.inngest.com/)
  - [Windmill](https://www.windmill.dev/)
  - [The AI Agent Index (MIT)](https://aiagentindex.mit.edu/)
  - [NotebookLM on Finding Model Capability Boundaries](https://open.substack.com/pub/swyx/p/notebooklm?selection=08e1187c-cfee-4c63-93c9-71216640a5f8)

## Contributors

Thanks to everyone who has contributed to 12-factor agents!

[<img src="https://avatars.githubusercontent.com/u/3730605?v=4&s=80" width="80px" alt="dexhorthy" />](https://github.com/dexhorthy) [<img src="https://avatars.githubusercontent.com/u/50557586?v=4&s=80" width="80px" alt="Sypherd" />](https://github.com/Sypherd) [<img src="https://avatars.githubusercontent.com/u/66259401?v=4&s=80" width="80px" alt="tofaramususa" />](https://github.com/tofaramususa) [<img src="https://avatars.githubusercontent.com/u/18105223?v=4&s=80" width="80px" alt="a-churchill" />](https://github.com/a-churchill) [<img src="https://avatars.githubusercontent.com/u/4084885?v=4&s=80" width="80px" alt="Elijas" />](https://github.com/Elijas) [<img src="https://avatars.githubusercontent.com/u/39267118?v=4&s=80" width="80px" alt="hugolmn" />](https://github.com/hugolmn) [<img src="https://avatars.githubusercontent.com/u/1882972?v=4&s=80" width="80px" alt="jeremypeters" />](https://github.com/jeremypeters)

[<img src="https://avatars.githubusercontent.com/u/380402?v=4&s=80" width="80px" alt="kndl" />](https://github.com/kndl) [<img src="https://avatars.githubusercontent.com/u/16674643?v=4&s=80" width="80px" alt="maciejkos" />](https://github.com/maciejkos) [<img src="https://avatars.githubusercontent.com/u/85041180?v=4&s=80" width="80px" alt="pfbyjy" />](https://github.com/pfbyjy) [<img src="https://avatars.githubusercontent.com/u/36044389?v=4&s=80" width="80px" alt="0xRaduan" />](https://github.com/0xRaduan) [<img src="https://avatars.githubusercontent.com/u/7169731?v=4&s=80" width="80px" alt="zyuanlim" />](https://github.com/zyuanlim) [<img src="https://avatars.githubusercontent.com/u/15862501?v=4&s=80" width="80px" alt="lombardo-chcg" />](https://github.com/lombardo-chcg) [<img src="https://avatars.githubusercontent.com/u/160066852?v=4&s=80" width="80px" alt="sahanatvessel" />](https://github.com/sahanatvessel)
 
## License

All content and images are licensed under a <a href="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0 License</a>

Code is licensed under the <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache 2.0 License</a>


================================================
FILE: content/appendix-13-pre-fetch.md
================================================
### Factor 13 - pre-fetch all the context you might need

If there's a high chance that your model will call tool X, don't waste token round trips telling the model to fetch it, that is, instead of a pseudo-prompt like:

```jinja
When looking at deployments, you will likely want to fetch the list of published git tags,
so you can use it to deploy to prod.

Here's what happened so far:

{{ thread.events }}

What's the next step?

Answer in JSON format with one of the following intents:

{
  intent: 'deploy_backend_to_prod',
  tag: string
} OR {
  intent: 'list_git_tags'
} OR {
  intent: 'done_for_now',
  message: string
}
```

and your code looks like

```python
thread = {"events": [initial_message]}
next_step = await determine_next_step(thread)

while True:
  switch next_step.intent:
    case 'list_git_tags':
      tags = await fetch_git_tags()
      thread["events"].append({
        type: 'list_git_tags',
        data: tags,
      })
    case 'deploy_backend_to_prod':
      deploy_result = await deploy_backend_to_prod(next_step.data.tag)
      thread["events"].append({
        "type": 'deploy_backend_to_prod',
        "data": deploy_result,
      })
    case 'done_for_now':
      await notify_human(next_step.message)
      break
    # ...
```

You might as well just fetch the tags and include them in the context window, like:

```diff
- When looking at deployments, you will likely want to fetch the list of published git tags,
- so you can use it to deploy to prod.

+ The current git tags are:

+ {{ git_tags }}


Here's what happened so far:

{{ thread.events }}

What's the next step?

Answer in JSON format with one of the following intents:

{
  intent: 'deploy_backend_to_prod',
  tag: string
- } OR {
-   intent: 'list_git_tags'
} OR {
  intent: 'done_for_now',
  message: string
}

```

and your code looks like

```diff
thread = {"events": [initial_message]}
+ git_tags = await fetch_git_tags()

- next_step = await determine_next_step(thread)
+ next_step = await determine_next_step(thread, git_tags)

while True:
  switch next_step.intent:
-    case 'list_git_tags':
-      tags = await fetch_git_tags()
-      thread["events"].append({
-        type: 'list_git_tags',
-        data: tags,
-      })
    case 'deploy_backend_to_prod':
      deploy_result = await deploy_backend_to_prod(next_step.data.tag)
      thread["events"].append({
        "type": 'deploy_backend_to_prod',
        "data": deploy_result,
      })
    case 'done_for_now':
      await notify_human(next_step.message)
      break
    # ...
```

or even just include the tags in the thread and remove the specific parameter from your prompt template:

```diff
thread = {"events": [initial_message]}
+ # add the request
+ thread["events"].append({
+  "type": 'list_git_tags',
+ })

git_tags = await fetch_git_tags()

+ # add the result
+ thread["events"].append({
+  "type": 'list_git_tags_result',
+  "data": git_tags,
+ })

- next_step = await determine_next_step(thread, git_tags)
+ next_step = await determine_next_step(thread)

while True:
  switch next_step.intent:
    case 'deploy_backend_to_prod':
      deploy_result = await deploy_backend_to_prod(next_step.data.tag)
      thread["events"].append(deploy_result)
    case 'done_for_now':
      await notify_human(next_step.message)
      break
    # ...
```

Overall:

> #### If you already know what tools you'll want the model to call, just call them DETERMINISTICALLY and let the model do the hard part of figuring out how to use their outputs

Again, AI engineering is all about [Context Engineering](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md).

[← Stateless Reducer](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md) | [Further Reading →](https://github.com/humanlayer/12-factor-agents/blob/main/README.md#related-resources)


================================================
FILE: content/brief-history-of-software.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

## The longer version: how we got here

### You don't have to listen to me

Whether you're new to agents or an ornery old veteran like me, I'm going to try to convince you to throw out most of what you think about AI Agents, take a step back, and rethink them from first principles. (spoiler alert if you didn't catch the OpenAI responses launch a few weeks back, but pushing MORE agent logic behind an API ain't it)


## Agents are software, and a brief history thereof

let's talk about how we got here

### 60 years ago

We're gonna talk a lot about Directed Graphs (DGs) and their Acyclic friends, DAGs. I'll start by pointing out that...well...software is a directed graph. There's a reason we used to represent programs as flow charts.

![010-software-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/010-software-dag.png)

### 20 years ago

Around 20 years ago, we started to see DAG orchestrators become popular. We're talking classics like [Airflow](https://airflow.apache.org/), [Prefect](https://www.prefect.io/), some predecessors, and some newer ones like ([dagster](https://dagster.io/), [inggest](https://www.inngest.com/), [windmill](https://www.windmill.dev/)). These followed the same graph pattern, with the added benefit of observability, modularity, retries, administration, etc.

![015-dag-orchestrators](https://github.com/humanlayer/12-factor-agents/blob/main/img/015-dag-orchestrators.png)

### 10-15 years ago

When ML models started to get good enough to be useful, we started to see DAGs with ML models sprinkled in. You might imagine steps like "summarize the text in this column into a new column" or "classify the support issues by severity or sentiment".

![020-dags-with-ml](https://github.com/humanlayer/12-factor-agents/blob/main/img/020-dags-with-ml.png)

But at the end of the day, it's still mostly the same good old deterministic software.

### The promise of agents

I'm not the first [person to say this](https://youtu.be/Dc99-zTMyMg?si=bcT0hIwWij2mR-40&t=73), but my biggest takeaway when I started learning about agents, was that you get to throw the DAG away. Instead of software engineers coding each step and edge case, you can give the agent a goal and a set of transitions:

![025-agent-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/025-agent-dag.png)

And let the LLM make decisions in real time to figure out the path

![026-agent-dag-lines](https://github.com/humanlayer/12-factor-agents/blob/main/img/026-agent-dag-lines.png)

The promise here is that you write less software, you just give the LLM the "edges" of the graph and let it figure out the nodes. You can recover from errors, you can write less code, and you may find that LLMs find novel solutions to problems.

### Agents as loops

Put another way, you've got this loop consisting of 3 steps:

1. LLM determines the next step in the workflow, outputting structured json ("tool calling")
2. Deterministic code executes the tool call
3. The result is appended to the context window 
4. repeat until the next step is determined to be "done"

```python
initial_event = {"message": "..."}
context = [initial_event]
while True:
  next_step = await llm.determine_next_step(context)
  context.append(next_step)

  if (next_step.intent === "done"):
    return next_step.final_answer

  result = await execute_step(next_step)
  context.append(result)
```

Our initial context is just the starting event (maybe a user message, maybe a cron fired, maybe a webhook, etc),
and we ask the llm to choose the next step (tool) or to determine that we're done.

Here's a multi-step example:

[![027-agent-loop-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif)](https://github.com/user-attachments/assets/3beb0966-fdb1-4c12-a47f-ed4e8240f8fd)

<details>
<summary><a href="https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif">GIF Version</a></summary>

![027-agent-loop-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif)

</details>

And the "materialized" DAG that was generated would look something like:

![027-agent-loop-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-dag.png)

### The problem with this "loop until you solve it" pattern

The biggest problems with this pattern:

- Agents get lost when the context window gets too long - they spin out trying the same broken approach over and over again
- literally thats it, but that's enough to kneecap the approach

Even if you haven't hand-rolled an agent, you've probably seen this long-context problem in working with agentic coding tools. They just get lost after a while and you need to start a new chat.

I'll even perhaps posit something I've heard in passing quite a bit, and that YOU probably have developed your own intuition around:

> ### **Even as models support longer and longer context windows, you'll ALWAYS get better results with a small, focused prompt and context**

Most builders I've talked to **pushed the "tool calling loop" idea to the side** when they realized that anything more than 10-20 turns becomes a big mess that the LLM can't recover from. Even if the agent gets it right 90% of the time, that's miles away from "good enough to put in customer hands". Can you imagine a web app that crashed on 10% of page loads?

**Update 2025-06-09** - I really like how [@swyx](https://x.com/swyx/status/1932125643384455237) put this:

<a href="https://x.com/swyx/status/1932125643384455237"><img width="593" alt="Screenshot 2025-07-02 at 11 50 50 AM" src="https://github.com/user-attachments/assets/c7d94042-e4b9-4b87-87fd-55c7ff94bb3b" /></a>

### What actually works - micro agents

One thing that I **have** seen in the wild quite a bit is taking the agent pattern and sprinkling it into a broader more deterministic DAG. 

![micro-agent-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/028-micro-agent-dag.png)

You might be asking - "why use agents at all in this case?" - we'll get into that shortly, but basically, having language models managing well-scoped sets of tasks makes it easy to incorporate live human feedback, translating it into workflow steps without spinning out into context error loops. ([factor 1](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md), [factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) [factor 7](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)).

> #### having language models managing well-scoped sets of tasks makes it easy to incorporate live human feedback...without spinning out into context error loops

### A real life micro agent 

Here's an example of how deterministic code might run one micro agent responsible for handling the human-in-the-loop steps for deployment. 

![029-deploybot-high-level](https://github.com/humanlayer/12-factor-agents/blob/main/img/029-deploybot-high-level.png)

* **Human** Merges PR to GitHub main branch
* **Deterministic Code** Deploys to staging env
* **Deterministic Code** Runs end-to-end (e2e) tests against staging
* **Deterministic Code** Hands to agent for prod deployment, with initial context: "deploy SHA 4af9ec0 to production"
* **Agent** calls `deploy_frontend_to_prod(4af9ec0)`
* **Deterministic code** requests human approval on this action
* **Human** Rejects the action with feedback "can you deploy the backend first?"
* **Agent** calls `deploy_backend_to_prod(4af9ec0)`
* **Deterministic code** requests human approval on this action
* **Human** approves the action
* **Deterministic code** executed the backend deployment
* **Agent** calls `deploy_frontend_to_prod(4af9ec0)`
* **Deterministic code** requests human approval on this action
* **Human** approves the action
* **Deterministic code** executed the frontend deployment
* **Agent** determines that the task was completed successfully, we're done!
* **Deterministic code** run the end-to-end tests against production
* **Deterministic code** task completed, OR pass to rollback agent to review failures and potentially roll back

[![033-deploybot-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/033-deploybot.gif)](https://github.com/user-attachments/assets/deb356e9-0198-45c2-9767-231cb569ae13)

<details>
<summary><a href="https://github.com/humanlayer/12-factor-agents/blob/main/img/033-deploybot.gif">GIF Version</a></summary>

![033-deploybot-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/033-deploybot.gif)

</details>

This example is based on a real life [OSS agent we've shipped to manage our deployments at Humanlayer](https://github.com/got-agents/agents/tree/main/deploybot-ts) - here is a real conversation I had with it last week:

![035-deploybot-conversation](https://github.com/humanlayer/12-factor-agents/blob/main/img/035-deploybot-conversation.png)


We haven't given this agent a huge pile of tools or tasks. The primary value in the LLM is parsing the human's plaintext feedback and proposing an updated course of action. We isolate tasks and contexts as much as possible to keep the LLM focused on a small, 5-10 step workflow.

Here's another [more classic support / chatbot demo](https://x.com/chainlit_io/status/1858613325921480922).

### So what's an agent really?

- **prompt** - tell an LLM how to behave, and what "tools" it has available. The output of the prompt is a JSON object that describe the next step in the workflow (the "tool call" or "function call"). ([factor 2](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md))
- **switch statement** - based on the JSON that the LLM returns, decide what to do with it. (part of [factor 8](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md))
- **accumulated context** - store the list of steps that have happened and their results ([factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md))
- **for loop** - until the LLM emits some sort of "Terminal" tool call (or plaintext response), add the result of the switch statement to the context window and ask the LLM to choose the next step. ([factor 8](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md))

![040-4-components](https://github.com/humanlayer/12-factor-agents/blob/main/img/040-4-components.png)

In the "deploybot" example, we gain a couple benefits from owning the control flow and context accumulation:

- In our **switch statement** and **for loop**, we can hijack control flow to pause for human input or to wait for completion of long-running tasks
- We can trivially serialize the **context** window for pause+resume
- In our **prompt**, we can optimize the heck out of how we pass instructions and "what happened so far" to the LLM


[Part II](https://github.com/humanlayer/12-factor-agents/blob/main/README.md#12-factor-agents) will **formalize these patterns** so they can be applied to add impressive AI features to any software project, without needing to go all in on conventional implementations/definitions of "AI agent".


[Factor 1 - Natural Language to Tool Calls →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)


================================================
FILE: content/factor-01-natural-language-to-tool-calls.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 1. Natural Language to Tool Calls 

One of the most common patterns in agent building is to convert natural language to structured tool calls. This is a powerful pattern that allows you to build agents that can reason about tasks and execute them.

![110-natural-language-tool-calls](https://github.com/humanlayer/12-factor-agents/blob/main/img/110-natural-language-tool-calls.png)

This pattern, when applied atomically, is the simple translation of a phrase like

> can you create a payment link for $750 to Terri for sponsoring the february AI tinkerers meetup? 

to a structured object that describes a Stripe API call like

```json
{
  "function": {
    "name": "create_payment_link",
    "parameters": {
      "amount": 750,
      "customer": "cust_128934ddasf9",
      "product": "prod_8675309",
      "price": "prc_09874329fds",
      "quantity": 1,
      "memo": "Hey Jeff - see below for the payment link for the february ai tinkerers meetup"
    }
  }
}
```

**Note**: in reality the stripe API is a bit more complex, a [real agent that does this](https://github.com/dexhorthy/mailcrew) ([video](https://www.youtube.com/watch?v=f_cKnoPC_Oo)) would list customers, list products, list prices, etc to build this payload with the proper ids, or include those ids in the prompt/context window (we'll see below how those are kinda the same thing though!)

From there, deterministic code can pick up the payload and do something with it. (More on this in [factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md))

```python
# The LLM takes natural language and returns a structured object
nextStep = await llm.determineNextStep(
  """
  create a payment link for $750 to Jeff 
  for sponsoring the february AI tinkerers meetup
  """
  )

# Handle the structured output based on its function
if nextStep.function == 'create_payment_link':
    stripe.paymentlinks.create(nextStep.parameters)
    return  # or whatever you want, see below
elif nextStep.function == 'something_else':
    # ... more cases
    pass
else:  # the model didn't call a tool we know about
    # do something else
    pass
```

**NOTE**: While a full agent would then receive the API call result and loop with it, eventually returning something like

> I've successfully created a payment link for $750 to Terri for sponsoring the february AI tinkerers meetup. Here's the link: https://buy.stripe.com/test_1234567890

**Instead**, We're actually going to skip that step here, and save it for another factor, which you may or may not want to also incorporate (up to you!)

[← How We Got Here](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) | [Own Your Prompts →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md)


================================================
FILE: content/factor-02-own-your-prompts.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 2. Own your prompts

Don't outsource your prompt engineering to a framework. 

![120-own-your-prompts](https://github.com/humanlayer/12-factor-agents/blob/main/img/120-own-your-prompts.png)

By the way, [this is far from novel advice:](https://hamel.dev/blog/posts/prompt/)

![image](https://github.com/user-attachments/assets/575bab37-0f96-49fb-9ce3-9a883cdd420b)

Some frameworks provide a "black box" approach like this:

```python
agent = Agent(
  role="...",
  goal="...",
  personality="...",
  tools=[tool1, tool2, tool3]
)

task = Task(
  instructions="...",
  expected_output=OutputModel
)

result = agent.run(task)
```

This is great for pulling in some TOP NOTCH prompt engineering to get you started, but it is often difficult to tune and/or reverse engineer to get exactly the right tokens into your model.

Instead, own your prompts and treat them as first-class code:

```rust
function DetermineNextStep(thread: string) -> DoneForNow | ListGitTags | DeployBackend | DeployFrontend | RequestMoreInformation {
  prompt #"
    {{ _.role("system") }}
    
    You are a helpful assistant that manages deployments for frontend and backend systems.
    You work diligently to ensure safe and successful deployments by following best practices
    and proper deployment procedures.
    
    Before deploying any system, you should check:
    - The deployment environment (staging vs production)
    - The correct tag/version to deploy
    - The current system status
    
    You can use tools like deploy_backend, deploy_frontend, and check_deployment_status
    to manage deployments. For sensitive deployments, use request_approval to get
    human verification.
    
    Always think about what to do first, like:
    - Check current deployment status
    - Verify the deployment tag exists
    - Request approval if needed
    - Deploy to staging before production
    - Monitor deployment progress
    
    {{ _.role("user") }}

    {{ thread }}
    
    What should the next step be?
  "#
}
```

(the above example uses [BAML](https://github.com/boundaryml/baml) to generate the prompt, but you can do this with any prompt engineering tool you want, or even just template it manually)

If the signature looks a little funny, we'll get to that in [factor 4 - tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)

```typescript
function DetermineNextStep(thread: string) -> DoneForNow | ListGitTags | DeployBackend | DeployFrontend | RequestMoreInformation {
```

Key benefits of owning your prompts:

1. **Full Control**: Write exactly the instructions your agent needs, no black box abstractions
2. **Testing and Evals**: Build tests and evals for your prompts just like you would for any other code
3. **Iteration**: Quickly modify prompts based on real-world performance
4. **Transparency**: Know exactly what instructions your agent is working with
5. **Role Hacking**: take advantage of APIs that support nonstandard usage of user/assistant roles - for example, the now-deprecated non-chat flavor of OpenAI "completions" API. This includes some so-called "model gaslighting" techniques

Remember: Your prompts are the primary interface between your application logic and the LLM.

Having full control over your prompts gives you the flexibility and prompt control you need for production-grade agents.

I don't know what's the best prompt, but I know you want the flexibility to be able to try EVERYTHING.

[← Natural Language To Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) | [Own Your Context Window →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)


================================================
FILE: content/factor-03-own-your-context-window.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 3. Own your context window

You don't necessarily need to use standard message-based formats for conveying context to an LLM.

> #### At any given point, your input to an LLM in an agent is "here's what's happened so far, what's the next step"

<!-- todo syntax highlighting -->
<!-- ![130-own-your-context-building](https://github.com/humanlayer/12-factor-agents/blob/main/img/130-own-your-context-building.png) -->

Everything is context engineering. [LLMs are stateless functions](https://thedataexchange.media/baml-revolution-in-ai-engineering/) that turn inputs into outputs. To get the best outputs, you need to give them the best inputs.

Creating great context means:

- The prompt and instructions you give to the model
- Any documents or external data you retrieve (e.g. RAG)
- Any past state, tool calls, results, or other history 
- Any past messages or events from related but separate histories/conversations (Memory)
- Instructions about what sorts of structured data to output

![image](https://github.com/user-attachments/assets/0f1f193f-8e94-4044-a276-576bd7764fd0)


### on context engineering

This guide is all about getting as much as possible out of today's models. Notably not mentioned are:

- Changes to models parameters like temperature, top_p, frequency_penalty, presence_penalty, etc.
- Training your own completion or embedding models
- Fine-tuning existing models

Again, I don't know what's the best way to hand context to an LLM, but I know you want the flexibility to be able to try EVERYTHING.

#### Standard vs Custom Context Formats

Most LLM clients use a standard message-based format like this:

```yaml
[
  {
    "role": "system",
    "content": "You are a helpful assistant..."
  },
  {
    "role": "user",
    "content": "Can you deploy the backend?"
  },
  {
    "role": "assistant",
    "content": null,
    "tool_calls": [
      {
        "id": "1",
        "name": "list_git_tags",
        "arguments": "{}"
      }
    ]
  },
  {
    "role": "tool",
    "name": "list_git_tags",
    "content": "{\"tags\": [{\"name\": \"v1.2.3\", \"commit\": \"abc123\", \"date\": \"2024-03-15T10:00:00Z\"}, {\"name\": \"v1.2.2\", \"commit\": \"def456\", \"date\": \"2024-03-14T15:30:00Z\"}, {\"name\": \"v1.2.1\", \"commit\": \"abe033d\", \"date\": \"2024-03-13T09:15:00Z\"}]}",
    "tool_call_id": "1"
  }
]
```

While this works great for most use cases, if you want to really get THE MOST out of today's LLMs, you need to get your context into the LLM in the most token- and attention-efficient way you can.

As an alternative to the standard message-based format, you can build your own context format that's optimized for your use case. For example, you can use custom objects and pack/spread them into one or more user, system, assistant, or tool messages as makes sense.

Here's an example of putting the whole context window into a single user message:
```yaml

[
  {
    "role": "system",
    "content": "You are a helpful assistant..."
  },
  {
    "role": "user",
    "content": |
            Here's everything that happened so far:
        
        <slack_message>
            From: @alex
            Channel: #deployments
            Text: Can you deploy the backend?
        </slack_message>
        
        <list_git_tags>
            intent: "list_git_tags"
        </list_git_tags>
        
        <list_git_tags_result>
            tags:
              - name: "v1.2.3"
                commit: "abc123"
                date: "2024-03-15T10:00:00Z"
              - name: "v1.2.2"
                commit: "def456"
                date: "2024-03-14T15:30:00Z"
              - name: "v1.2.1"
                commit: "ghi789"
                date: "2024-03-13T09:15:00Z"
        </list_git_tags_result>
        
        what's the next step?
    }
]
```

The model may infer that you're asking it `what's the next step` by the tool schemas you supply, but it never hurts to roll it into your prompt template.

### code example

We can build this with something like: 

```python

class Thread:
  events: List[Event]

class Event:
  # could just use string, or could be explicit - up to you
  type: Literal["list_git_tags", "deploy_backend", "deploy_frontend", "request_more_information", "done_for_now", "list_git_tags_result", "deploy_backend_result", "deploy_frontend_result", "request_more_information_result", "done_for_now_result", "error"]
  data: ListGitTags | DeployBackend | DeployFrontend | RequestMoreInformation |  
        ListGitTagsResult | DeployBackendResult | DeployFrontendResult | RequestMoreInformationResult | string

def event_to_prompt(event: Event) -> str:
    data = event.data if isinstance(event.data, str) \
           else stringifyToYaml(event.data)

    return f"<{event.type}>\n{data}\n</{event.type}>"


def thread_to_prompt(thread: Thread) -> str:
  return '\n\n'.join(event_to_prompt(event) for event in thread.events)
```

#### Example Context Windows

Here's how context windows might look with this approach:

**Initial Slack Request:**
```xml
<slack_message>
    From: @alex
    Channel: #deployments
    Text: Can you deploy the latest backend to production?
</slack_message>
```

**After Listing Git Tags:**
```xml
<slack_message>
    From: @alex
    Channel: #deployments
    Text: Can you deploy the latest backend to production?
    Thread: []
</slack_message>

<list_git_tags>
    intent: "list_git_tags"
</list_git_tags>

<list_git_tags_result>
    tags:
      - name: "v1.2.3"
        commit: "abc123"
        date: "2024-03-15T10:00:00Z"
      - name: "v1.2.2"
        commit: "def456"
        date: "2024-03-14T15:30:00Z"
      - name: "v1.2.1"
        commit: "ghi789"
        date: "2024-03-13T09:15:00Z"
</list_git_tags_result>
```

**After Error and Recovery:**
```xml
<slack_message>
    From: @alex
    Channel: #deployments
    Text: Can you deploy the latest backend to production?
    Thread: []
</slack_message>

<deploy_backend>
    intent: "deploy_backend"
    tag: "v1.2.3"
    environment: "production"
</deploy_backend>

<error>
    error running deploy_backend: Failed to connect to deployment service
</error>

<request_more_information>
    intent: "request_more_information_from_human"
    question: "I had trouble connecting to the deployment service, can you provide more details and/or check on the status of the service?"
</request_more_information>

<human_response>
    data:
      response: "I'm not sure what's going on, can you check on the status of the latest workflow?"
</human_response>
```

From here your next step might be: 

```python
nextStep = await determine_next_step(thread_to_prompt(thread))
```

```python
{
  "intent": "get_workflow_status",
  "workflow_name": "tag_push_prod.yaml",
}
```

The XML-style format is just one example - the point is you can build your own format that makes sense for your application. You'll get better quality if you have the flexibility to experiment with different context structures and what you store vs. what you pass to the LLM. 

Key benefits of owning your context window:

1. **Information Density**: Structure information in ways that maximize the LLM's understanding
2. **Error Handling**: Include error information in a format that helps the LLM recover. Consider hiding errors and failed calls from context window once they are resolved.
3. **Safety**: Control what information gets passed to the LLM, filtering out sensitive data
4. **Flexibility**: Adapt the format as you learn what works best for your use case
5. **Token Efficiency**: Optimize context format for token efficiency and LLM understanding

Context includes: prompts, instructions, RAG documents, history, tool calls, memory


Remember: The context window is your primary interface with the LLM. Taking control of how you structure and present information can dramatically improve your agent's performance.

Example - information density - same message, fewer tokens:

![Loom Screenshot 2025-04-22 at 09 00 56](https://github.com/user-attachments/assets/5cf041c6-72da-4943-be8a-99c73162b12a)


### Don't take it from me

About 2 months after 12-factor agents was published, context engineering started to become a pretty popular term.

<a href="https://x.com/karpathy/status/1937902205765607626"><img width="378" alt="Screenshot 2025-06-25 at 4 11 45 PM" src="https://github.com/user-attachments/assets/97e6e667-c35f-4855-8233-af40f05d6bce" /></a> <a href="https://x.com/tobi/status/1935533422589399127"><img width="378" alt="Screenshot 2025-06-25 at 4 12 59 PM" src="https://github.com/user-attachments/assets/7e6f5738-0d38-4910-82d1-7f5785b82b99" /></a>

There's also a quite good [Context Engineering Cheat Sheet](https://x.com/lenadroid/status/1943685060785524824) from [@lenadroid](https://x.com/lenadroid) from July 2025.

<a href="https://x.com/lenadroid/status/1943685060785524824"><img width="256" alt="image" src="https://github.com/user-attachments/assets/cac88aa3-8faf-440b-9736-cab95a9de477" /></a>


Recurring theme here: I don't know what's the best approach, but I know you want the flexibility to be able to try EVERYTHING.


[← Own Your Prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) | [Tools Are Structured Outputs →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)


================================================
FILE: content/factor-04-tools-are-structured-outputs.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 4. Tools are just structured outputs

Tools don't need to be complex. At their core, they're just structured output from your LLM that triggers deterministic code.

![140-tools-are-just-structured-outputs](https://github.com/humanlayer/12-factor-agents/blob/main/img/140-tools-are-just-structured-outputs.png)

For example, lets say you have two tools `CreateIssue` and `SearchIssues`. To ask an LLM to "use one of several tools" is just to ask it to output JSON we can parse into an object representing those tools.

```python

class Issue:
  title: str
  description: str
  team_id: str
  assignee_id: str

class CreateIssue:
  intent: "create_issue"
  issue: Issue

class SearchIssues:
  intent: "search_issues"
  query: str
  what_youre_looking_for: str
```

The pattern is simple:
1. LLM outputs structured JSON
3. Deterministic code executes the appropriate action (like calling an external API)
4. Results are captured and fed back into the context

This creates a clean separation between the LLM's decision-making and your application's actions. The LLM decides what to do, but your code controls how it's done. Just because an LLM "called a tool" doesn't mean you have to go execute a specific corresponding function in the same way every time.

If you recall our switch statement from above

```python
if nextStep.intent == 'create_payment_link':
    stripe.paymentlinks.create(nextStep.parameters)
    return # or whatever you want, see below
elif nextStep.intent == 'wait_for_a_while': 
    # do something monadic idk
else: #... the model didn't call a tool we know about
    # do something else
```

**Note**: there has been a lot said about the benefits of "plain prompting" vs. "tool calling" vs. "JSON mode" and the performance tradeoffs of each. We'll link some resources to that stuff soon, but not gonna get into it here. See [Prompting vs JSON Mode vs Function Calling vs Constrained Generation vs SAP](https://www.boundaryml.com/blog/schema-aligned-parsing), [When should I use function calling, structured outputs, or JSON mode?](https://www.vellum.ai/blog/when-should-i-use-function-calling-structured-outputs-or-json-mode#:~:text=We%20don%27t%20recommend%20using%20JSON,always%20use%20Structured%20Outputs%20instead) and [OpenAI JSON vs Function Calling](https://docs.llamaindex.ai/en/stable/examples/llm/openai_json_vs_function_calling/).

The "next step" might not be as atomic as just "run a pure function and return the result". You unlock a lot of flexibility when you think of "tool calls" as just a model outputting JSON describing what deterministic code should do. Put this together with [factor 8 own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md).

[← Own Your Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) | [Unify Execution State →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)


================================================
FILE: content/factor-05-unify-execution-state.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 5. Unify execution state and business state

Even outside the AI world, many infrastructure systems try to separate "execution state" from "business state". For AI apps, this might involve complex abstractions to track things like current step, next step, waiting status, retry counts, etc. This separation creates complexity that may be worthwhile, but may be overkill for your use case. 

As always, it's up to you to decide what's right for your application. But don't think you *have* to manage them separately.

More clearly:

- **Execution state**: current step, next step, waiting status, retry counts, etc. 
- **Business state**: What's happened in the agent workflow so far (e.g. list of OpenAI messages, list of tool calls and results, etc.)

If possible, SIMPLIFY - unify these as much as possible. 

[![155-unify-state](https://github.com/humanlayer/12-factor-agents/blob/main/img/155-unify-state-animation.gif)](https://github.com/user-attachments/assets/e5a851db-f58f-43d8-8b0c-1926c99fc68d)


<details>
<summary><a href="https://github.com/humanlayer/12-factor-agents/blob/main/img/155-unify-state-animation.gif">GIF Version</a></summary>

![155-unify-state](https://github.com/humanlayer/12-factor-agents/blob/main/img/155-unify-state-animation.gif)

</details>

In reality, you can engineer your application so that you can infer all execution state from the context window. In many cases, execution state (current step, waiting status, etc.) is just metadata about what has happened so far.

You may have things that can't go in the context window, like session ids, password contexts, etc, but your goal should be to minimize those things. By embracing [factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) you can control what actually goes into the LLM 

This approach has several benefits:

1. **Simplicity**: One source of truth for all state
2. **Serialization**: The thread is trivially serializable/deserializable
3. **Debugging**: The entire history is visible in one place
4. **Flexibility**: Easy to add new state by just adding new event types
5. **Recovery**: Can resume from any point by just loading the thread
6. **Forking**: Can fork the thread at any point by copying some subset of the thread into a new context / state ID
7. **Human Interfaces and Observability**: Trivial to convert a thread into a human-readable markdown or a rich Web app UI

[← Tools Are Structured Outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) | [Launch/Pause/Resume →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md)


================================================
FILE: content/factor-06-launch-pause-resume.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 6. Launch/Pause/Resume with simple APIs

Agents are just programs, and we have things we expect from how to launch, query, resume, and stop them.

[![pause-resume animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/165-pause-resume-animation.gif)](https://github.com/user-attachments/assets/feb1a425-cb96-4009-a133-8bd29480f21f)

<details>
<summary><a href="https://github.com/humanlayer/12-factor-agents/blob/main/img/165-pause-resume-animation.gif">GIF Version</a></summary>

![pause-resume animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/165-pause-resume-animation.gif)

</details>


It should be easy for users, apps, pipelines, and other agents to launch an agent with a simple API.

Agents and their orchestrating deterministic code should be able to pause an agent when a long-running operation is needed.

External triggers like webhooks should enable agents to resume from where they left off without deep integration with the agent orchestrator.

Closely related to [factor 5 - unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) and [factor 8 - own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md), but can be implemented independently.


**Note** - often AI orchestrators will allow for pause and resume, but not between the moment of tool selection and tool execution. See also [factor 7 - contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) and [factor 11 - trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md).

[← Unify Execution State](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) | [Contact Humans With Tools →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)

================================================
FILE: content/factor-07-contact-humans-with-tools.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 7. Contact humans with tool calls

By default, LLM APIs rely on a fundamental HIGH-STAKES token choice: Are we returning plaintext content, or are we returning structured data?

![170-contact-humans-with-tools](https://github.com/humanlayer/12-factor-agents/blob/main/img/170-contact-humans-with-tools.png)

You're putting a lot of weight on that choice of first token, which, in the `the weather in tokyo` case, is

> "the"

but in the `fetch_weather` case, it's some special token to denote the start of a JSON object.

> |JSON>

You might get better results by having the LLM *always* output json, and then declare it's intent with some natural language tokens like `request_human_input` or `done_for_now` (as opposed to a "proper" tool like `check_weather_in_city`). 

Again, you might not get any performance boost from this, but you should experiment, and ensure you're free to try weird stuff to get the best results.

```python

class Options:
  urgency: Literal["low", "medium", "high"]
  format: Literal["free_text", "yes_no", "multiple_choice"]
  choices: List[str]

# Tool definition for human interaction
class RequestHumanInput:
  intent: "request_human_input"
  question: str
  context: str
  options: Options

# Example usage in the agent loop
if nextStep.intent == 'request_human_input':
  thread.events.append({
    type: 'human_input_requested',
    data: nextStep
  })
  thread_id = await save_state(thread)
  await notify_human(nextStep, thread_id)
  return # Break loop and wait for response to come back with thread ID
else:
  # ... other cases
```

Later, you might receive a webhook from a system that handles slack, email, sms, or other events.

```python

@app.post('/webhook')
def webhook(req: Request):
  thread_id = req.body.threadId
  thread = await load_state(thread_id)
  thread.events.push({
    type: 'response_from_human',
    data: req.body
  })
  # ... simplified for brevity, you likely don't want to block the web worker here
  next_step = await determine_next_step(thread_to_prompt(thread))
  thread.events.append(next_step)
  result = await handle_next_step(thread, next_step)
  # todo - loop or break or whatever you want

  return {"status": "ok"}
```

The above includes patterns from [factor 5 - unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md), [factor 8 - own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md), [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md), and [factor 4 - tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md), and several others.

If we were using the XML-y formatted from [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md), our context window after a few turns might look like this:

```xml

(snipped for brevity)

<slack_message>
    From: @alex
    Channel: #deployments
    Text: Can you deploy backend v1.2.3 to production?
    Thread: []
</slack_message>

<request_human_input>
    intent: "request_human_input"
    question: "Would you like to proceed with deploying v1.2.3 to production?"
    context: "This is a production deployment that will affect live users."
    options: {
        urgency: "high"
        format: "yes_no"
    }
</request_human_input>

<human_response>
    response: "yes please proceed"
    approved: true
    timestamp: "2024-03-15T10:30:00Z"
    user: "alex@company.com"
</human_response>

<deploy_backend>
    intent: "deploy_backend"
    tag: "v1.2.3"
    environment: "production"
</deploy_backend>

<deploy_backend_result>
    status: "success"
    message: "Deployment v1.2.3 to production completed successfully."
    timestamp: "2024-03-15T10:30:00Z"
</deploy_backend_result>
```


Benefits:

1. **Clear Instructions**: Tools for different types of human contact allow for more specificity from the LLM
2. **Inner vs Outer Loop**: Enables agents workflows **outside** of the traditional chatGPT-style interface, where the control flow and context initialization may be `Agent->Human` rather than `Human->Agent` (think, agents kicked off by a cron or an event)
3. **Multiple Human Access**: Can easily track and coordinate input from different humans through structured events
4. **Multi-Agent**: Simple abstraction can be easily extended to support `Agent->Agent` requests and responses
5. **Durable**: Combined with [factor 6 - launch/pause/resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md), this makes for durable, reliable, and introspectable multiplayer workflows

[More on Outer Loop Agents over here](https://theouterloop.substack.com/p/openais-realtime-api-is-a-step-towards)

![175-outer-loop-agents](https://github.com/humanlayer/12-factor-agents/blob/main/img/175-outer-loop-agents.png)

Works great with [factor 11 - trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md)

[← Launch/Pause/Resume](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) | [Own Your Control Flow →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md)


================================================
FILE: content/factor-08-own-your-control-flow.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 8. Own your control flow

If you own your control flow, you can do lots of fun things.

![180-control-flow](https://github.com/humanlayer/12-factor-agents/blob/main/img/180-control-flow.png)


Build your own control structures that make sense for your specific use case. Specifically, certain types of tool calls may be reason to break out of the loop and wait for a response from a human or another long-running task like a training pipeline. You may also want to incorporate custom implementation of:

- summarization or caching of tool call results
- LLM-as-judge on structured output
- context window compaction or other [memory management](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)
- logging, tracing, and metrics
- client-side rate limiting
- durable sleep / pause / "wait for event"


The below example shows three possible control flow patterns:


- request_clarification: model asked for more info, break the loop and wait for a response from a human
- fetch_git_tags: model asked for a list of git tags, fetch the tags, append to context window, and pass straight back to the model
- deploy_backend: model asked to deploy a backend, this is a high-stakes thing, so break the loop and wait for human approval

```python
def handle_next_step(thread: Thread):

  while True:
    next_step = await determine_next_step(thread_to_prompt(thread))
    
    # inlined for clarity - in reality you could put 
    # this in a method, use exceptions for control flow, or whatever you want
    if next_step.intent == 'request_clarification':
      thread.events.append({
        type: 'request_clarification',
          data: nextStep,
        })

      await send_message_to_human(next_step)
      await db.save_thread(thread)
      # async step - break the loop, we'll get a webhook later
      break
    elif next_step.intent == 'fetch_open_issues':
      thread.events.append({
        type: 'fetch_open_issues',
        data: next_step,
      })

      issues = await linear_client.issues()

      thread.events.append({
        type: 'fetch_open_issues_result',
        data: issues,
      })
      # sync step - pass the new context to the LLM to determine the NEXT next step
      continue
    elif next_step.intent == 'create_issue':
      thread.events.append({
        type: 'create_issue',
        data: next_step,
      })

      await request_human_approval(next_step)
      await db.save_thread(thread)
      # async step - break the loop, we'll get a webhook later
      break
```

This pattern allows you to interrupt and resume your agent's flow as needed, creating more natural conversations and workflows.

**Example** - the number one feature request I have for every AI framework out there is we need to be able to interrupt 
a working agent and resume later, ESPECIALLY between the moment of tool **selection** and the moment of tool **invocation**.

Without this level of resumability/granularity, there's no way to review/approve the tool call before it runs, which means
you're forced to either:

1. Pause the task in memory while waiting for the long-running thing to complete (think `while...sleep`) and restart it from the beginning if the process is interrupted
2. Restrict the agent to only low-stakes, low-risk calls like research and summarization
3. Give the agent access to do bigger, more useful things, and just yolo hope it doesn't screw up


You may notice this is closely related to [factor 5 - unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) and [factor 6 - launch/pause/resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md), but can be implemented independently.

[← Contact Humans With Tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) | [Compact Errors →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md)


================================================
FILE: content/factor-09-compact-errors.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 9. Compact Errors into Context Window

This one is a little short but is worth mentioning. One of these benefits of agents is "self-healing" - for short tasks, an LLM might call a tool that fails. Good LLMs have a fairly good chance of reading an error message or stack trace and figuring out what to change in a subsequent tool call.


Most frameworks implement this, but you can do JUST THIS without doing any of the other 11 factors. Here's an example: 


```python
thread = {"events": [initial_message]}

while True:
  next_step = await determine_next_step(thread_to_prompt(thread))
  thread["events"].append({
    "type": next_step.intent,
    "data": next_step,
  })
  try:
    result = await handle_next_step(thread, next_step) # our switch statement
  except Exception as e:
    # if we get an error, we can add it to the context window and try again
    thread["events"].append({
      "type": 'error',
      "data": format_error(e),
    })
    # loop, or do whatever else here to try to recover
```

You may want to implement an errorCounter for a specific tool call, to limit to ~3 attempts of a single tool, or whatever other logic makes sense for your use case. 

```python
consecutive_errors = 0

while True:

  # ... existing code ...

  try:
    result = await handle_next_step(thread, next_step)
    thread["events"].append({
      "type": next_step.intent + '_result',
      data: result,
    })
    # success! reset the error counter
    consecutive_errors = 0
  except Exception as e:
    consecutive_errors += 1
    if consecutive_errors < 3:
      # do the loop and try again
      thread["events"].append({
        "type": 'error',
        "data": format_error(e),
      })
    else:
      # break the loop, reset parts of the context window, escalate to a human, or whatever else you want to do
      break
  }
}
```
Hitting some consecutive-error-threshold might be a great place to [escalate to a human](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md), whether by model decision or via deterministic takeover of the control flow.

[![195-factor-09-errors](https://github.com/humanlayer/12-factor-agents/blob/main/img/195-factor-09-errors.gif)](https://github.com/user-attachments/assets/cd7ed814-8309-4baf-81a5-9502f91d4043)


<details>
<summary>[GIF Version](https://github.com/humanlayer/12-factor-agents/blob/main/img/195-factor-09-errors.gif)</summary>

![195-factor-09-errors](https://github.com/humanlayer/12-factor-agents/blob/main/img/195-factor-09-errors.gif)

</details>

Benefits:

1. **Self-Healing**: The LLM can read the error message and figure out what to change in a subsequent tool call
2. **Durable**: The agent can continue to run even if one tool call fails

I'm sure you will find that if you do this TOO much, your agent will start to spin out and might repeat the same error over and over again. 

That's where [factor 8 - own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) and [factor 3 - own your context building](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) come in - you don't need to just put the raw error back on, you can completely restructure how it's represented, remove previous events from the context window, or whatever deterministic thing you find works to get an agent back on track. 

But the number one way to prevent error spin-outs is to embrace [factor 10 - small, focused agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md).

[← Own Your Control Flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) | [Small Focused Agents →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md)


================================================
FILE: content/factor-1-natural-language-to-tool-calls.md
================================================
[Moved to factor-01-natural-language-to-tool-calls.md](./factor-01-natural-language-to-tool-calls.md)


================================================
FILE: content/factor-10-small-focused-agents.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 10. Small, Focused Agents

Rather than building monolithic agents that try to do everything, build small, focused agents that do one thing well. Agents are just one building block in a larger, mostly deterministic system.

![1a0-small-focused-agents](https://github.com/humanlayer/12-factor-agents/blob/main/img/1a0-small-focused-agents.png)

The key insight here is about LLM limitations: the bigger and more complex a task is, the more steps it will take, which means a longer context window. As context grows, LLMs are more likely to get lost or lose focus. By keeping agents focused on specific domains with 3-10, maybe 20 steps max, we keep context windows manageable and LLM performance high.

> #### As context grows, LLMs are more likely to get lost or lose focus

Benefits of small, focused agents:

1. **Manageable Context**: Smaller context windows mean better LLM performance
2. **Clear Responsibilities**: Each agent has a well-defined scope and purpose
3. **Better Reliability**: Less chance of getting lost in complex workflows
4. **Easier Testing**: Simpler to test and validate specific functionality
5. **Improved Debugging**: Easier to identify and fix issues when they occur

### What if LLMs get smarter? 

Do we still need this if LLMs get smart enough to handle 100-step+ workflows?

tl;dr yes. As agents and LLMs improve, they **might** naturally expand to be able to handle longer context windows. This means handling MORE of a larger DAG. This small, focused approach ensures you can get results TODAY, while preparing you to slowly expand agent scope as LLM context windows become more reliable. (If you've refactored large deterministic code bases before, you may be nodding your head right now).

[![gif](https://github.com/humanlayer/12-factor-agents/blob/main/img/1a5-agent-scope-grow.gif)](https://github.com/user-attachments/assets/0cd3f52c-046e-4d5e-bab4-57657157c82f
)

<details>
<summary><a href="https://github.com/humanlayer/12-factor-agents/blob/main/img/1a5-agent-scope-grow.gif">GIF Version</a></summary>
![gif](https://github.com/humanlayer/12-factor-agents/blob/main/img/1a5-agent-scope-grow.gif)
</details>

Being intentional about size/scope of agents, and only growing in ways that allow you to maintain quality, is key here. As the [team that built NotebookLM put it](https://open.substack.com/pub/swyx/p/notebooklm?selection=08e1187c-cfee-4c63-93c9-71216640a5f8&utm_campaign=post-share-selection&utm_medium=web):

> I feel like consistently, the most magical moments out of AI building come about for me when I'm really, really, really just close to the edge of the model capability

Regardless of where that boundary is, if you can find that boundary and get it right consistently, you'll be building magical experiences. There are many moats to be built here, but as usual, they take some engineering rigor.

[← Compact Errors](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) | [Trigger From Anywhere →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md)


================================================
FILE: content/factor-11-trigger-from-anywhere.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 11. Trigger from anywhere, meet users where they are

If you're waiting for the [humanlayer](https://humanlayer.dev) pitch, you made it. If you're doing [factor 6 - launch/pause/resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) and [factor 7 - contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md), you're ready to incorporate this factor.

![1b0-trigger-from-anywhere](https://github.com/humanlayer/12-factor-agents/blob/main/img/1b0-trigger-from-anywhere.png)

Enable users to trigger agents from slack, email, sms, or whatever other channel they want. Enable agents to respond via the same channels.

Benefits:

- **Meet users where they are**: This helps you build AI applications that feel like real humans, or at the very least, digital coworkers
- **Outer Loop Agents**: Enable agents to be triggered by non-humans, e.g. events, crons, outages, whatever else. They may work for 5, 20, 90 minutes, but when they get to a critical point, they can contact a human for help, feedback, or approval
- **High Stakes Tools**: If you're able to quickly loop in a variety of humans, you can give agents access to higher stakes operations like sending external emails, updating production data and more. Maintaining clear standards gets you auditability and confidence in agents that [perform bigger better things](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md#what-if-llms-get-smarter)

[← Small Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) | [Stateless Reducer →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md)

================================================
FILE: content/factor-12-stateless-reducer.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)

### 12. Make your agent a stateless reducer

Okay so we're over 1000 lines of markdown at this point. This one is mostly just for fun.

![1c0-stateless-reducer](https://github.com/humanlayer/12-factor-agents/blob/main/img/1c0-stateless-reducer.png)


![1c5-agent-foldl](https://github.com/humanlayer/12-factor-agents/blob/main/img/1c5-agent-foldl.png)

[← Trigger From Anywhere](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) | [Appendix - Pre-Fetch Context →](https://github.com/humanlayer/12-factor-agents/blob/main/content/appendix-13-pre-fetch.md)


================================================
FILE: content/factor-2-own-your-prompts.md
================================================
[Moved to factor-02-own-your-prompts.md](./factor-02-own-your-prompts.md)


================================================
FILE: content/factor-3-own-your-context-window.md
================================================
[Moved to factor-03-own-your-context-window.md](./factor-03-own-your-context-window.md)


================================================
FILE: content/factor-4-tools-are-structured-outputs.md
================================================
[Moved to factor-04-tools-are-structured-outputs.md](./factor-04-tools-are-structured-outputs.md)


================================================
FILE: content/factor-5-unify-execution-state.md
================================================
[Moved to factor-05-unify-execution-state.md](./factor-05-unify-execution-state.md)


================================================
FILE: content/factor-6-launch-pause-resume.md
================================================
[Moved to factor-06-launch-pause-resume.md](./factor-06-launch-pause-resume.md)


================================================
FILE: content/factor-7-contact-humans-with-tools.md
================================================
[Moved to factor-07-contact-humans-with-tools.md](./factor-07-contact-humans-with-tools.md)


================================================
FILE: content/factor-8-own-your-control-flow.md
================================================
[Moved to factor-08-own-your-control-flow.md](./factor-08-own-your-control-flow.md)


================================================
FILE: content/factor-9-compact-errors.md
================================================
[Moved to factor-09-compact-errors.md](./factor-09-compact-errors.md)


================================================
FILE: drafts/a2h-spec.md
================================================
# A2H - The Agent-to-Human Protocol


## Overview

A2H is a service that allows an agent to request human interaction


## Why another protocol?

MCP and A2A are not enough

## Shoulds

- Clients should respect A2H_BASE_URL and A2H_API_KEY environment variables if set, to allow for simple oauth2-based authentication to REST services.

## Core Protocol 

### Scopes 

The A2H protocol supports two scopes:

- The agent side, APIs consumed by an agent to request human interaction
- The (Optional) admin side, APIs consumed by an admin or web application to manage humans and their contact channels

This separation allows for agents to query and find humans to contact, without exposing the human's contact details to the agent. It is the responsibility of the A2H provider to relay agent requests to the appropriate human via that human's preferred contact channel(s).

### Objects

```
apiVersion: proto.a2h.dev/v1alpha1
kind: Message
metatdata:
  uid: "123"
spec: # spec sent by agent
  message: "" # message from the agent
  response_schema:
   # optional, json schema for the response,
  channel_id: 
status: # status resolved by a2h server
  humanMessage: "" # message from the human
  response:
    # optional, matches spec schema
```

```
apiVersion: proto.a2h.dev/v1alpha1
kind: NewConversation
metadata:
  uid: "abc"
spec: # spec sent by a2h server
  message: "" # message from the agent
  channel_id: "123" # channel id to use for future conversations
  response_schema:
   # optional, json schema for the response,
```


#### HumanContact

```json
{
  "run_id": "run_123",
  "call_id": "call_456",
  "spec": {
    "msg": "I've tried using the tool to refund the customer but its returning a 500 error. Can you help?",
    "channel": {
      "slack": {
        "channel_or_user_id": "U1234567890",
        "context_about_channel_or_user": "Support team lead"
      }
    },
  },
}
```

A HumanContact represents a request for human interaction. It contains:

- `run_id` (string): Unique identifier for the run
- `call_id` (string): Unique identifier for the contact request
- `spec` (HumanContactSpec): The specification for the contact request
- `status` (HumanContactStatus, optional): The current status of the contact request

The HumanContactSpec contains:
- `msg` (string): The message to send to the human
- `subject` (string, optional): Subject of the contact request
- `channel` (ContactChannel, optional): The channel to use for contact
- `response_options` (ResponseOption[], optional): Available response options
- `state` (object, optional): Additional state information

The HumanContactStatus contains:
- `requested_at` (datetime, optional): When the contact was requested
- `responded_at` (datetime, optional): When the human responded
- `response` (string, optional): The human's response
- `response_option_name` (string, optional): Name of the selected response option
- `slack_message_ts` (string, optional): Slack message timestamp if applicable
- `failed_validation_details` (object, optional): Details if validation failed

#### FunctionCall

Example:
```json
{
  "run_id": "run_789",
  "call_id": "call_101",
  "spec": {
    "fn": "process_payment",
    "kwargs": {
      "amount": 100.00,
      "currency": "USD",
      "recipient": "merchant_123"
    },
    "channel": {
      "email": {
        "address": "ap@example.com",
      }
    },
  },
  "status": {
    "requested_at": "2024-03-20T11:00:00Z",
    "responded_at": "2024-03-20T11:02:00Z",
    "approved": true,
    "comment": "Payment looks good, approved",
    "user_info": {
      "name": "John Doe",
      "role": "Finance Manager"
    },
    "slack_message_ts": "1234567890.123457"
  }
}
```

A FunctionCall represents a request for human approval of a function execution. It contains:

- `run_id` (string): Unique identifier for the run
- `call_id` (string): Unique identifier for the function call
- `spec` (FunctionCallSpec): The specification for the function call
- `status` (FunctionCallStatus, optional): The current status of the function call

The FunctionCallSpec contains:
- `fn` (string): The function to be called
- `kwargs` (object): The keyword arguments for the function
- `channel` (ContactChannel, optional): The channel to use for contact
- `reject_options` (ResponseOption[], optional): Available rejection options
- `state` (object, optional): Additional state information

The FunctionCallStatus contains:
- `requested_at` (datetime, optional): When the approval was requested
- `responded_at` (datetime, optional): When the human responded
- `approved` (boolean, optional): Whether the function call was approved
- `comment` (string, optional): Any comment from the human
- `user_info` (object, optional): Information about the responding user
- `slack_context` (object, optional): Slack-specific context
- `reject_option_name` (string, optional): Name of the selected rejection option
- `slack_message_ts` (string, optional): Slack message timestamp if applicable
- `failed_validation_details` (object, optional): Details if validation failed

#### ContactChannel

Example:
```json
{
  "slack": {
    "channel_or_user_id": "U1234567890",
    "context_about_channel_or_user": "Support team lead",
    "allowed_responder_ids": ["U1234567890", "U2345678901"],
    "experimental_slack_blocks": true,
    "thread_ts": "1234567890.123456"
  }
}
```

or

```json
{
    "email": {
        "address": "ap@example.com",
        "context_about_user": "Accounts Payable",
        "in_reply_to_message_id": "1234567890",
        "references_message_id": "1234567890",
        "template": "<html><body>...</body></html>"
    }
}
```

A ContactChannel represents a channel through which a human can be contacted. The protocol supports several channel types:

1. SlackContactChannel:
   - `channel_or_user_id` (string): The Slack channel or user ID
   - `context_about_channel_or_user` (string, optional): Additional context
   - `bot_token` (string, optional): Bot token for authentication
   - `allowed_responder_ids` (string[], optional): IDs of allowed responders
   - `experimental_slack_blocks` (boolean, optional): Enable experimental blocks
   - `thread_ts` (string, optional): Thread timestamp for threaded messages

2. SMSContactChannel:
   - `phone_number` (string): The phone number to contact
   - `context_about_user` (string, optional): Additional context about the user

3. WhatsAppContactChannel:
   - `phone_number` (string): The phone number to contact
   - `context_about_user` (string, optional): Additional context about the user

#### Human (Agent Side)

From the agent's perspective, a human is an object that has a name and description.

#### Human (Admin Side)

From the admin's perspective, a human is an object that has a name, description, and a list of prioritized contact channels, with details 

### Agent Endpoints


#### POST /human_contacts

#### GET /human_contacts/:call_id

#### POST /function_calls

#### GET /function_calls/:call_id


## Extended Protocol

- Admin Humans
- Agent Humans Get
- Agent Humans Search
- Agent Channels List
- Agent Channels validate

### Objects

#### Human (Agent Side)

From the agent's perspective, a human is an object that has a name and description.

#### Human (Admin Side)

From the admin's perspective, a human is an object that has a name, description, and a list of prioritized contact channels, with details 

### Agent Endpoints

#### GET /channels 

return what contact channels are available and their supported fields

example response:

```json
{
    "channels": {
        "slack": {
            "channelOrUserId": {
                "type": "string",
                "description": "The Slack channel or user ID to send messages to"
            },
            "contextAboutChannelOrUser": {
                "type": "string", 
                "description": "Additional context about the Slack channel or user"
            }
        },
        "email": {
            "address": {
                "type": "string",
                "description": "Email address to send messages to"
            },
            "contextAboutUser": {
                "type": "string",
                "description": "Additional context about the email recipient"
            },
            "inReplyToMessageId": {
                "type": "string",
                "description": "The message ID of the email to reply to"
            },
            "referencesMessageId": {
                "type": "string",
                "description": "The message ID of the email to reference"
            }
        }
    }
}
```

#### GET /humans

return a list of humans that are available to interact with

example response:

```json
{
    "humans": [
        {
            "id": "654",
            "name": "Jane Doe",
            "description": "Jane Doe is a human who knows about technology and entrepreneurship",
        },
        {
            "id": "123",
            "name": "John Doe",
            "description": "John Doe is a human who knows about sales and marketing"
        }
    ]
}
#### GET /humans/search?q=

search for humans by name or description

example response:

```json
{
    "humans": [
        {
            "id": "654",
            "name": "Jane Doe",
            "description": "Jane Doe is a human who knows about technology and entrepreneurship",
        },
    ]
}
```

### Administrative Endpoints


#### POST /humans

Enroll a new human for agent contact

example request:

```json
{
    "name": "John Doe",
    "description": "John Doe is a human who knows about sales and marketing",
    "prioritizedContactChannels": [
        {
            "slack": {
                "channelOrUserId": "U1234567890",
            }
        },
        {
            "email": {
                "address": "john.doe@example.com",
            }
        }
    ]
}
```


#### GET /humans/:id

Get a human by id

example response:

```json

================================================
FILE: drafts/ah2-openapi.json
================================================


================================================
FILE: hack/contributors_markdown/.python-version
================================================
3.13


================================================
FILE: hack/contributors_markdown/README.md
================================================


================================================
FILE: hack/contributors_markdown/contributors_markdown.py
================================================
#!/usr/bin/env python3
"""
Generate a Markdown grid of contributor avatars for a GitHub repository.

Usage:
    python generate_contributors_grid.py --repo owner/name --token GH_TOKEN [--cols 7] [--image_size 80] [--output FILE]

Arguments:
  --repo         GitHub repository in "owner/name" form (e.g. "octocat/Hello-World")
  --token        Personal access token with `public_repo` scope (or `repo` for private).
                 Can also be provided via the GITHUB_TOKEN environment variable.
  --cols         Number of avatars per row in the generated grid (default 7).
  --image_size   Pixel width for avatars (GitHub automatically resizes; default 80).
  --output       File to write the Markdown grid into (default: stdout, use '-' for stdout).

The generated file contains a Markdown table‑less grid of linked avatars that can
be embedded in README.md or any other Markdown document.
"""

from __future__ import annotations
import argparse
import os
import sys
import textwrap
from typing import List, Dict

import requests

API_URL_TEMPLATE = "https://api.github.com/repos/{owner}/{repo}/contributors"


def fetch_contributors(owner: str, repo: str, token: str | None, per_page: int = 100) -> List[Dict]:
    """Return a list of contributor objects from the GitHub REST API."""
    headers = {"Accept": "application/vnd.github+json"}
    if token:
        headers["Authorization"] = f"Bearer {token}"

    contributors: List[Dict] = []
    page = 1
    while True:
        url = f"{API_URL_TEMPLATE.format(owner=owner, repo=repo)}?per_page={per_page}&page={page}"
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        batch = response.json()
        if not batch:
            break
        contributors.extend(batch)
        if len(batch) < per_page:
            break
        page += 1
    return contributors


def build_markdown(contributors: List[Dict], cols: int = 7, image_size: int = 80) -> str:
    """Return a Markdown fragment containing a grid of linked avatar images."""
    lines: List[str] = []
    row: List[str] = []

    for contributor in contributors:
        login = contributor["login"]
        avatar = f"{contributor['avatar_url']}&s={image_size}"
        profile = contributor["html_url"]
        cell = f'[<img src="{avatar}" width="{image_size}px" alt="{login}" />]({profile})'
        row.append(cell)
        if len(row) == cols:
            lines.append(" ".join(row))
            row = []

    if row:
        lines.append(" ".join(row))

    return "\n\n".join(lines)


def main() -> None:
    parser = argparse.ArgumentParser(description="Generate a Markdown grid of contributor avatars")
    parser.add_argument("--repo", required=True, help="GitHub repo in owner/name form")
    parser.add_argument("--token", help="GitHub Personal Access Token (or set GITHUB_TOKEN env)")
    parser.add_argument("--cols", type=int, default=7, help="Number of avatars per row (default 7)")
    parser.add_argument("--image_size", type=int, default=80, help="Avatar size in px (default 80)")
    parser.add_argument("--output", "-o", default="-", help="Output file (default: stdout, use '-' for stdout)")

    args = parser.parse_args()
    token = args.token or os.getenv("GITHUB_TOKEN")
    if not token:
        parser.error("A GitHub token must be supplied via --token or GITHUB_TOKEN env var.")

    if "/" not in args.repo:
        parser.error("--repo must be in 'owner/name' form")
    owner, repo = args.repo.split("/", 1)

    contributors = fetch_contributors(owner, repo, token)
    if not contributors:
        sys.exit("No contributors found. Is the repository correct and does the token have access?")

    markdown = build_markdown(contributors, cols=args.cols, image_size=args.image_size)

    header = textwrap.dedent(
        f"""
        <!-- AUTO-GENERATED BY generate_contributors_grid.py -->
        ## Contributors
        Thanks to these wonderful people:\n
        """
    )

    if args.output == "-":
        sys.stdout.write(header)
        sys.stdout.write(markdown)
        sys.stdout.write("\n")
    else:
        with open(args.output, "w", encoding="utf-8") as fh:
            fh.write(header)
            fh.write(markdown)
            fh.write("\n")
        print(f"Wrote {len(contributors)} contributors to {args.output}", file=sys.stderr)


if __name__ == "__main__":
    main()


================================================
FILE: hack/contributors_markdown/pyproject.toml
================================================
[project]
name = "contributors-markdown"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
    "requests>=2.32.3",
]


================================================
FILE: packages/create-12-factor-agent/template/.gitignore
================================================
baml_client/
node_modules/
.threads/


================================================
FILE: packages/create-12-factor-agent/template/README.md
================================================
# Chapter 0 - Hello World

Let's start with a basic TypeScript setup and a hello world program.

This guide is written in TypeScript (yes, a python version is coming soon)

There are many checkpoints between the every file edit in theworkshop steps, 
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.

To run this guide, you'll need a relatively recent version of nodejs and npm installed

You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine


    brew install node@20

You should see the node version

    node --version

Copy initial package.json

    cp ./walkthrough/00-package.json package.json

Install dependencies

    npm install

Copy tsconfig.json

    cp ./walkthrough/00-tsconfig.json tsconfig.json

add .gitignore

    cp ./walkthrough/00-.gitignore .gitignore

Create src folder

    mkdir -p src

Add a simple hello world index.ts

    cp ./walkthrough/00-index.ts src/index.ts

Run it to verify

    npx tsx src/index.ts

You should see:

    hello, world!


# Chapter 1 - CLI and Agent Loop

Now let's add BAML and create our first agent with a CLI interface.

First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.


    npm install @boundaryml/baml

Initialize BAML

    npx baml-cli init

Remove default resume.baml

    rm baml_src/resume.baml

Add our starter agent, a single baml prompt that we'll build on

    cp ./walkthrough/01-agent.baml baml_src/agent.baml

Generate BAML client code

    npx baml-cli generate

Enable BAML logging for this section

    export BAML_LOG=debug

Add the CLI interface

    cp ./walkthrough/01-cli.ts src/cli.ts

Update index.ts to use the CLI

    cp ./walkthrough/01-index.ts src/index.ts

Add the agent implementation

    cp ./walkthrough/01-agent.ts src/agent.ts

The the BAML code is configured to use BASETEN_API_KEY by default

To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co),
and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/).

```rust 
  function DetermineNextStep(thread: string) -> DoneForNow {
      client Qwen3
      // ...
```

If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key.

If you want to try swapping out the model, you can change the `client` line.

[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)

For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) 
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.

For example, to use openai with an OPENAI_API_KEY, you can do:

    client "openai/gpt-4o"


Set your env vars

    export BASETEN_API_KEY=...
export BASETEN_BASE_URL=...

Try it out

    npx tsx src/index.ts hello

you should see a familiar response from the model

    {
      intent: 'done_for_now',
      message: 'Hello! How can I assist you today?'
    }


# Chapter 2 - Add Calculator Tools

Let's add some calculator tools to our agent.

Let's start by adding a tool definition for the calculator

These are simpile structured outputs that we'll ask the model to 
return as a "next step" in the agentic loop.


    cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml

Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps


    cp ./walkthrough/02-agent.baml baml_src/agent.baml

Generate updated BAML client

    npx baml-cli generate

Try out the calculator

    npx tsx src/index.ts 'can you add 3 and 4'

You should see a tool call to the calculator

    {
      intent: 'add',
      a: 3,
      b: 4
    }


# Chapter 3 - Process Tool Calls in a Loop

Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.

First, lets update the agent to handle the tool call


    cp ./walkthrough/03-agent.ts src/agent.ts

Now, lets try it out


    npx tsx src/index.ts 'can you add 3 and 4'

you should see the agent call the tool and then return the result

    {
      intent: 'done_for_now',
      message: 'The sum of 3 and 4 is 7.'
    }

For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output

    export BAML_LOG=off

Try a multi-step calculation

    npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'

you'll notice that tools like multiply and divide are not available

    npx tsx src/index.ts 'can you multiply 3 and 4'

next, let's add handlers for the rest of the calculator tools


    cp ./walkthrough/03b-agent.ts src/agent.ts

Test subtraction

    npx tsx src/index.ts 'can you subtract 3 from 4'

now, let's test the multiplication tool


    npx tsx src/index.ts 'can you multiply 3 and 4'

finally, let's test a more complex calculation with multiple operations


    npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

congratulations, you've taking your first step into hand-rolling an agent loop.

from here, we're going to start incorporating some more intermediate and advanced
concepts for 12-factor agents.


# Chapter 4 - Add Tests to agent.baml

Let's add some tests to our BAML agent.

to start, leave the baml logs enabled

    export BAML_LOG=debug

next, let's add some tests to the agent

We'll start with a simple test that checks the agent's ability to handle
a basic calculation.


    cp ./walkthrough/04-agent.baml baml_src/agent.baml

Run the tests

    npx baml-cli test

now, let's improve the test with assertions!

Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.


    cp ./walkthrough/04b-agent.baml baml_src/agent.baml

Run the tests

    npx baml-cli test

as you add more tests, you can disable the logs to keep the output clean.
You may want to turn them on as you iterate on specific tests.


    export BAML_LOG=off

now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window


    cp ./walkthrough/04c-agent.baml baml_src/agent.baml

let's try to run it


    npx baml-cli test


# Chapter 5 - Multiple Human Tools

In this section, we'll add support for multiple tools that serve to
contact humans.


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

first, let's add a tool that can request clarification from a human

this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.


    cp ./walkthrough/05-agent.baml baml_src/agent.baml

next, let's re-generate the client code

NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.


    npx baml-cli generate

now, let's update the agent to use the new tool


    cp ./walkthrough/05-agent.ts src/agent.ts

next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI


    cp ./walkthrough/05-cli.ts src/cli.ts

let's try it out


    npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '

next, let's add a test that checks the agent's ability to handle
a clarification request


    cp ./walkthrough/05b-agent.baml baml_src/agent.baml

and now we can run the tests again


    npx baml-cli test

you'll notice the new test passes, but the hello world test fails

This is because the agent's default behavior is to return "done_for_now"


    cp ./walkthrough/05c-agent.baml baml_src/agent.baml

Verify tests pass

    npx baml-cli test


# Chapter 6 - Customize Your Prompt with Reasoning

In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.

this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)

there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)


for this section, it will be helpful to leave the baml logs enabled

    export BAML_LOG=debug

update the agent prompt to include a reasoning step


    cp ./walkthrough/06-agent.baml baml_src/agent.baml

generate the updated client

    npx baml-cli generate

now, you can try it out with a simple prompt


    npx tsx src/index.ts 'can you multiply 3 and 4'

you should see output from the baml logs showing the reasoning steps

#### optional challenge

add a field to your tool output format that includes the reasoning steps in the output!


# Chapter 7 - Customize Your Context Window

In this section, we'll explore how to customize the context window
of the agent.

this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)


update the agent to pretty-print the Context window for the model


    cp ./walkthrough/07-agent.ts src/agent.ts

Test the formatting

    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

next, let's update the agent to use XML formatting instead

this is a very popular format for passing data to a model,

among other things, because of the token efficiency of XML.


    cp ./walkthrough/07b-agent.ts src/agent.ts

let's try it out


    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

lets update our tests to match the new output format


    cp ./walkthrough/07c-agent.baml baml_src/agent.baml

check out the updated tests


    npx baml-cli test


# Chapter 8 - Adding API Endpoints

Add an Express server to expose the agent via HTTP.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install Express and types

    npm install express && npm install --save-dev @types/express supertest

Add the server implementation

    cp ./walkthrough/08-server.ts src/server.ts

Start the server

    npx tsx src/server.ts

Test with curl (in another terminal)

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you add 3 and 4"}'

You should get an answer from the agent which includes the
agentic trace, ending in a message like:


    {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}


# Chapter 9 - In-Memory State and Async Clarification

Add state management and async clarification support.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Add some simple in-memory state management for threads

    cp ./walkthrough/09-state.ts src/state.ts

update the server to use the state management

* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id
* implement POST /thread/:id/response


    cp ./walkthrough/09-server.ts src/server.ts

Start the server

    npx tsx src/server.ts

Test clarification flow

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you multiply 3 and xyz"}'


# Chapter 10 - Adding Human Approval

Add support for human approval of operations.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

update the server to handle human approvals

* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs


    cp ./walkthrough/10-server.ts src/server.ts

Add a few methods to the agent to handle approvals and responses

    cp ./walkthrough/10-agent.ts src/agent.ts

Start the server

    npx tsx src/server.ts

Test division with approval

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you divide 3 by 4"}'

You should see:

    {
      "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
      "events": [
        {
          "type": "user_input",
          "data": "can you divide 3 by 4"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "divide",
            "a": 3,
            "b": 4,
            "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
          }
        }
      ]
    }

reject the request with another curl call, changing the thread ID

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'

You should see: the last tool call is now `"intent":"divide","a":3,"b":5`

    {
      "events": [
        {
          "type": "user_input",
          "data": "can you divide 3 by 4"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "divide",
            "a": 3,
            "b": 4,
            "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
          }
        },
        {
          "type": "tool_response",
          "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "divide",
            "a": 3,
            "b": 5,
            "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
          }
        }
      ]
    }

now you can approve the operation

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": true}'

you should see the final message includes the tool response and final result!

    ...
    {
      "type": "tool_response",
      "data": 0.5
    },
    {
      "type": "done_for_now",
      "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
      "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
    }


# Chapter 11 - Human Approvals over email

in this section, we'll add support for human approvals over email.

This will start a little bit contrived, just to get the concepts down -

We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI

While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install HumanLayer

    npm install humanlayer

Update CLI to send `divide` and `request_more_information` to a human via email

    cp ./walkthrough/11-cli.ts src/cli.ts

Run the CLI

    npx tsx src/index.ts 'can you divide 4 by 5'

The last line of your program should mention human review step

    nextStep { intent: 'divide', a: 4, b: 5 }
    HumanLayer: Requested human approval from HumanLayer cloud

go ahead and respond to the email with some feedback:

![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true)


you should get another email with an updated attempt based on your feedback!

You can go ahead and approve this one:

![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true)


and your final output will look like

    nextStep {
     intent: 'done_for_now',
     message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
    }
    The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!

lets implement the `request_more_information` flow as well


    cp ./walkthrough/11b-cli.ts src/cli.ts

lets test the require_approval flow as by asking for a calculation
with garbled input:


    npx tsx src/index.ts 'can you multiply 4 and xyz'

You should get an email with a request for clarification

    Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?

you can response with something like

    use 8 instead of xyz

you should see a final result on the CLI like

    I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.

as a final step, lets explore using a custom html template for the email


    cp ./walkthrough/11c-cli.ts src/cli.ts

first try with divide:


    npx tsx src/index.ts 'can you divide 4 by 5'

you should see a slightly different email with the custom template

![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true)

feel free to run with the flow and then you can try updating the template to your liking

(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)

try triggering "request_more_information" as well!


thats it - in the next chapter, we'll build a fully email-driven
workflow agent that uses webhooks for human approval


# Chapter XX - HumanLayer Webhook Integration

the previous sections used the humanlayer SDK in "synchronous mode" - that
means every time we wait for human approval, we sit in a loop
polling until the human response if received.

That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results.


add code to initialize humanlayer in the server


    cp ./walkthrough/12-1-server-init.ts src/server.ts

next, lets update the /thread endpoint to

1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls


Update the server to be able to handle request_clarification responses

- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response


    cp ./walkthrough/12a-server.ts src/server.ts

Start the server in another terminal

    npx tsx src/server.ts

now that the server is running, send a payload to the '/thread' endpoint


__ do the response step

__ now handle approvals for divide

__ now also handle done_for_now


================================================
FILE: packages/create-12-factor-agent/template/baml_src/agent.baml
================================================
class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

class RequestApprovalFromManager {
  intent "request_approval_from_manager"
  message string
}

class ProcessRefund {
  intent "process_refund" @description("you can process a refund for a customer, always request approval from the manager before processing a refund")
  order_id string
  amount int | float
  reason string
}

type HumanTools = ClarificationRequest | DoneForNow | RequestApprovalFromManager
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
type CustomerSupportTools = ProcessRefund

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools | CustomerSupportTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}


test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: packages/create-12-factor-agent/template/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: packages/create-12-factor-agent/template/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.88.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: packages/create-12-factor-agent/template/baml_src/tool_calculator.baml
================================================


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: packages/create-12-factor-agent/template/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "@boundaryml/baml": "latest",
        "express": "^5.1.0",
        "humanlayer": "^0.7.7",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0",
        "zod": "^3.25.64"
    },
    "devDependencies": {
        "@types/express": "^5.0.1",
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0",
        "supertest": "^7.1.0"
    }
}


================================================
FILE: packages/create-12-factor-agent/template/src/a2h.ts
================================================
import { z, ZodSchema } from 'zod';

// Types for A2H API objects matching the new schemas

// Common metadata type
export type Metadata = {
  uid: string;
};

// Message sent by agent to a2h server
type MessageSpec<T extends ZodSchema<any>> = {
  agentMessage: string; // message from the agent
  response_schema?: T; // optional Zod schema for the response
  channel_id?: string; // optional channel id
};

export type Message<T extends ZodSchema<any> = ZodSchema<any>> = {
  apiVersion: "proto.a2h.dev/v1alpha1";
  kind: "Message";
  metadata: Metadata;
  spec: MessageSpec<T>;
  status?: {
    humanMessage?: string; // message from the human
    response?: T extends ZodSchema<any> ? z.infer<T> : any; // optional, matches spec schema
  };
};

export const ApprovalSchema = z.object({
  approved: z.boolean(),
  comment: z.string().optional(),
});

export type ApprovalRequest = Message<typeof ApprovalSchema>;
export type HumanRequest = Message;

// NewConversation sent by a2h server to agent
type NewConversationSpec = {
  user_message: string; // message from the human
  channel_id: string; // channel id to use for future conversations
  agent_name?: string; // optional agent name or identifier
  raw?: Record<string, any>; // optional raw data from the request, e.g. email metadata
};

export type NewConversation = {
  apiVersion: "proto.a2h.dev/v1alpha1";
  kind: "NewConversation";
  metadata: Metadata;
  spec: NewConversationSpec;
};

// Optionally, you can add union types for future extensibility
export type A2HEvent<T extends ZodSchema<any> = ZodSchema<any>> = Message<T> | NewConversation;


================================================
FILE: packages/create-12-factor-agent/template/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }

    awaitingHumanResponse(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
    }

    awaitingHumanApproval(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return lastEvent.data.intent === 'divide';
    }

    lastEvent(): Event {
        return this.events[this.events.length - 1];
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {
    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
            case "request_approval_from_manager":
                // response to human, return the thread
                return thread;
            case "divide":
                // divide is scary, return it for human approval
                return thread;
            case "add":
            case "subtract":
            case "multiply":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: packages/create-12-factor-agent/template/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event, handleNextStep } from "../src/agent";
import { FileSystemThreadStore } from "./state";
import chalk from "chalk";

const threadStore = new FileSystemThreadStore();

export async function cliOuterLoop(message: string) {
    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);
    const threadId = await threadStore.create(thread);

    // Run the agent loop with the thread

    // loop until ctrl+c
    // optional, you could exit on done_for_now and print the final result
    // while (lastEvent.data.intent !== "done_for_now") {
    while (true) {
        let newThread = await agentLoop(thread);
        await threadStore.update(threadId, newThread);
        let lastEvent = newThread.lastEvent();

        // everything on CLI
        const responseEvent = await askHumanCLI(lastEvent);
        newThread.events.push(responseEvent);
        // if (lastEvent.data.intent === "request_approval_from_manager") {
        //     const responseEvent = await askManager(lastEvent);
        //     thread.events.push(responseEvent);
        // } else {
        //     const responseEvent = await askHumanCLI(lastEvent);
        //     thread.events.push(responseEvent);
        // }
        await threadStore.update(threadId, newThread);
    }
}

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    const message = args.length === 0 ? "hello!" : args.join(" ");

    await cliOuterLoop(message);
}

// async function askManager(lastEvent: Event): Promise<Event> {
//     const hl = humanlayer({
//         contactChannel: {
//              email: {
//                 address: process.env.HUMANLAYER_EMAIL_ADDRESS || "manager@example.com"
//             }
//         }
//     })
//     const resp = await hl.fetchHumanResponse({
//         spec: {
//             msg: lastEvent.data.message
//         }
//      })
//      return {
//         type: "manager_response",
//         data: resp
//      }
// }

async function askHumanCLI(lastEvent: Event): Promise<Event> {

    switch (lastEvent.data.intent) {
        case "divide":
            const response = await approveCLI(`agent wants to run ${chalk.green(JSON.stringify(lastEvent.data))}\nPress Enter to approve, or type feedback to cancel:`);
            if (response.approved) {
                const thread = new Thread([lastEvent]);
                const result = await handleNextStep(lastEvent.data, thread);
                return result.events[result.events.length - 1];
            } else {
                return {
                    type: "tool_response",
                    data: `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}`
                };
            }
        case "request_more_information":
        case "request_approval_from_manager":
        case "done_for_now":
            const message = await messageCLI(lastEvent.data.message);
            return {
                type: "tool_response",
                data: message
            };
        default:
            throw new Error(`unknown tool in outer loop: ${lastEvent.data.intent}`)
    }
}

type Approval = {
    approved: true;
} | {
    approved: false;
    comment: string;
}
async function messageCLI(message: string): Promise<string> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            readline.close();
            resolve(answer);
        });
    });
}

async function approveCLI(message: string): Promise<Approval> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            readline.close();
            // If the answer is empty (just pressed enter), treat it as approval
            if (answer.trim() === '') {
                resolve({ approved: true });
            } else {
                // Any non-empty response is treated as rejection with feedback
                resolve({ approved: false, comment: answer });
            }
        });
    });
}


if (require.main === module) {
    cli()
}

================================================
FILE: packages/create-12-factor-agent/template/src/index.ts
================================================
import { cli } from "./cli"

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: packages/create-12-factor-agent/template/src/server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop as innerLoop, handleNextStep } from '../src/agent';
import { FileSystemThreadStore, ThreadStore } from '../src/state';
import { ContactChannel, FunctionCall, HumanContact, humanlayer, V1Beta2EmailEventReceived, V1Beta2HumanContactCompleted, V1Beta2SlackEventReceived } from '@humanlayer/sdk';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new FileSystemThreadStore();

type V1Beta3ConversationCreated = {
    is_test: boolean;
    type: "conversation.created";
    event: {
        user_message: string;
        contact_channel_id: number;
        agent_name: string;
    }
}

type CompletedHumanContact = HumanContact & {
    status: {
        response: string;
    }
}

type V1Veta3HumanContactCompleted = {
    is_test: boolean;
    type: "human_contact.completed";
    event: {
        contact_channel_id: number;
    } & CompletedHumanContact
}

type Approved = {status: {approved: true}}
type Rejected = {status: {approved: false; comment: string}}

type CompletedFunctionCall = FunctionCall & (Approved | Rejected)

type V1Beta3FunctionCallCompleted = {
    is_test: boolean;
    type: "function_call.completed";
    event: {
        contact_channel_id: number;
    } & CompletedFunctionCall
}

type V1Beta3Event = V1Beta3ConversationCreated | V1Veta3HumanContactCompleted | V1Beta3FunctionCallCompleted;

const notFound = (res: Response) => {
    res.status(404).json({
        error: 'Not Found',
        message: `Thread not found`,
        status: 404
    });
}

const outerLoop = async (req: Request, res: Response) => {
    console.log("outerLoop", req.body);
    const body = req.body as V1Beta3Event;
    const hl = humanlayer({
        runId: process.env.HUMANLAYER_RUN_ID || `12fa-agent`,
        contactChannel: {
            channel_id: body.event.contact_channel_id,
        } as ContactChannel // todo export this type flavor
    });

    /* get the thread or make a new one*/
    let thread: Thread | undefined;
    let threadId: string | undefined;
    switch (body.type) {
        case "conversation.created":
            thread = new Thread([{type: "conversation.created", data: body.event.user_message}]);
            break;
        case "human_contact.completed":
        case "function_call.completed":
            threadId = body.event.spec.state?.thread_id;
            if (!threadId) {
                notFound(res);
                return;
            }
            thread = store.get(threadId);
            if (!thread) {
                notFound(res);
                return;
            }
            break;
    }


    /* handle the response event */
    if (body.type === "function_call.completed" && body.event.status?.approved) {
        // run the function call and add the result to the thread
        thread = await handleNextStep(thread.lastEvent().data, thread);
    } else if (body.type === "function_call.completed" && !body.event.status?.approved) {
        // add the denial to the thread
        thread.events.push({
            type: "human_response", 
            data: `user denied operation ${thread.lastEvent().data.intent} with feedback: ${body.event.status?.comment}`
        });
    } else if (body.type === "human_contact.completed") {
        // add the human response to the thread
        thread.events.push({
            type: "human_response",
            data: {
                msg: body.event.status.response,
            }
        });
    }

    /* run the inner loop */
    await Promise.resolve().then(async() => {
        const newThread = await innerLoop(thread);
        if (threadId) {
            store.update(threadId, newThread);
        } else {
            threadId = store.create(newThread);
        }
        // we exited the inner loop, send to human
        const lastEvent = newThread.lastEvent();
        switch (lastEvent.data.intent) {
            case "request_more_information":
            case "done_for_now":
                hl.createHumanContact({
                    spec: {
                        msg: lastEvent.data.message,
                        state: {
                            thread_id: threadId
                        }
                    }
                });
                console.log(`created human contact "${lastEvent.data.message}"`);
                break;
            case "other_scary_tools":  // example, add more tools here
            case "divide":
                const intent = lastEvent.data.intent;
                // remove intent from kwargs payload
                const { intent: _, ...kwargs } = lastEvent.data;
                hl.createFunctionCall({
                    spec: {
                        fn: intent,
                        kwargs: kwargs,
                        state: {
                            thread_id: threadId
                        }
                    }
                });
                console.log("created function call", {intent, kwargs});
                break;
        }
    });
    res.json({ status: "ok" });
}

export const startServer = () => {
    app.post('/api/v1/conversations', outerLoop)
    
    // Handle 404 - Not Found
    app.use((req: Request, res: Response) => {
        res.status(404).json({
            error: 'Not Found',
            message: `Route ${req.originalUrl} not found`,
            status: 404
        });
    });
    
    const port = process.env.PORT || 8000;
    const server = app.listen(port, () => {
        console.log(`Server is running on port ${port}`);
    });

    server.on('error', (error: Error) => {
        console.error('Server error:', error);
    });

    return server;
}

// Only start the server if this file is being run directly
if (require.main === module) {
    startServer();
}

================================================
FILE: packages/create-12-factor-agent/template/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';
import { Response } from 'express';
import fs from 'fs/promises';
import path from 'path';

export interface ThreadStore {
    create(thread: Thread): Promise<string>;
    get(id: string): Promise<Thread | undefined>;
    update(id: string, thread: Thread): Promise<void>;
}

// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class FileSystemThreadStore implements ThreadStore {
    private threadsDir: string;
    
    constructor() {
        this.threadsDir = path.join(process.cwd(), '.threads');
    }
    
    async create(thread: Thread): Promise<string> {
        await fs.mkdir(this.threadsDir, { recursive: true });
        const id = crypto.randomUUID();
        const filePath = path.join(this.threadsDir, `${id}.json`);
        const txtPath = path.join(this.threadsDir, `${id}.txt`);
        await Promise.all([
            fs.writeFile(filePath, JSON.stringify(thread, null, 2)),
            fs.writeFile(txtPath, thread.serializeForLLM())
        ]);
        return id;
    }
    
    async get(id: string): Promise<Thread | undefined> {
        const filePath = path.join(this.threadsDir, `${id}.json`);
        const data = await fs.readFile(filePath, 'utf8').catch(() => null);
        if (!data) return undefined;
        return new Thread(JSON.parse(data).events);
    }

    async update(id: string, thread: Thread): Promise<void> {
        const filePath = path.join(this.threadsDir, `${id}.json`);
        const txtPath = path.join(this.threadsDir, `${id}.txt`);
        await Promise.all([
            fs.writeFile(filePath, JSON.stringify(thread, null, 2)),
            fs.writeFile(txtPath, thread.serializeForLLM())
        ]);
    }
}

================================================
FILE: packages/create-12-factor-agent/template/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: packages/walkthroughgen/.gitignore
================================================
.tmptest*

================================================
FILE: packages/walkthroughgen/examples/typescript/.gitignore
================================================
build/

================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/00-package-lock.json
================================================
{
    "name": "walkthroughgen",
    "version": "1.0.0",
    "lockfileVersion": 3,
    "requires": true,
    "packages": {
      "": {
        "name": "walkthroughgen",
        "version": "1.0.0",
        "license": "ISC",
        "dependencies": {
          "typescript": "^5.8.3"
        }
      }
    },
    "node_modules/typescript": {
      "version": "5.8.3",
      "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz",
      "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
      "license": "Apache-2.0",
      "bin": {
        "tsc": "bin/tsc",
        "tsserver": "bin/tsserver"
      },
      "engines": {
        "node": ">=14.17"
      }
    }
  }
  

================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/00-package.json
================================================
{
    "name": "walkthroughgen",
    "version": "1.0.0",
    "main": "index.js",
    "scripts": {
      "test": "echo \"Error: no test specified\" && exit 1"
    },
    "keywords": [],
    "author": "",
    "license": "ISC",
    "description": "",
    "dependencies": {
      "typescript": "^5.8.3"
    }
  }
  

================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/00-tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "es2016",
      "module": "commonjs",
      "esModuleInterop": true,
      "forceConsistentCasingInFileNames": true,
      "strict": true,
      "skipLibCheck": true
    },
    "exclude": ["node_modules", "dist", "**/*walkthrough/**"]
  }
  

================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/01-index.ts
================================================
const main = () => {
  console.log("hello world");
};

main();

================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/02-cli.ts
================================================
const cli = () => {
    const args = process.argv.slice(2);
    const command = args[0];
    const name = args[1];
    if (command === "create") {
        console.log(`Creating ${name}`);
    } else {
        console.log("Invalid command: ", command);
        console.log("available commands: create");
    }
};

cli();


================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/02-index.ts
================================================
const main = async () => {
    return cli();
  };
  
main().catch(console.error);
  

================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough.yaml
================================================
title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"
targets:
  - markdown: "./build/walkthrough.md" # generates a walkthrough.md file
    onChange: # default behavior - on changes, show diffs and cp commands
      diff: true
      cp: true
    newFiles: # when new files are created, just show the copy command
      cat: false
      cp: true
  - final: "./build/final" # outputs the final project to the final folder
  - folders: "./build/by-section" # creates a separate working folder for each section
sections:
  - name: setup
    title: "Copy initial files"
    steps:
      - file: {src: ./walkthrough/00-package.json, dest: package.json}
      - file: {src: ./walkthrough/00-package-lock.json, dest: package-lock.json}
      - file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json}
  - name: initialize
    title: "Initialize the project"
    steps:
      - text: "initialize the project"
        command: |
          npm install
      - text: "then add index.ts"
        file: {src: ./walkthrough/01-index.ts, dest: src/index.ts}
      - text: "run it with tsx"
        command: |
          npx tsx src/index.ts
        results:
          - text: "you should see a hello world message"
            code: |
              hello world
  - name: add-cli
    title: "Add a CLI"
    steps:
      - text: "add a cli"
        file: {src: ./walkthrough/02-cli.ts, dest: src/cli.ts}
      - text: "update index.ts to use the cli"
        file: {src: ./walkthrough/02-index.ts, dest: src/index.ts}

================================================
FILE: packages/walkthroughgen/examples/walkthroughgen/walkthrough.yaml
================================================
title: "using walkthroughgen"
targets:
  - markdown: "./walkthrough.md" # generates a walkthrough.md file
    diffs: true
  - final: "./final" # outputs the final project to the final folder
  - folders: "./by-section" # creates a separate working folder for each section
init:
  - file: {src: ./walkthrough/00-package.json, dest: package.json}
  - file: {src: ./walkthrough/00-package-lock.json, dest: package-lock.json}
sections:
  - name: initialize
    title: "initialize the project"
    steps:
      - text: "initialize walkthroughgen"
        command: |
          npx wtg init my-project
          cd my-project
      - text: "this will create an empty project with a walkthrough.yaml file"
        command: |
          ls -la
          cat walkthrough.yaml
        results:
          - text: "you should see a walkthrough.yaml file"
            code: |
              # walkthrough.yaml
              title: "hello world"
              sections:
                - name: initialize
                  title: "initialize the project"
                  steps:
                    - text: "initialize the project"
                      command: |
                        # your code here
  - name: build
    title: "build the project"
    steps:
      - text: "build the project"
        command: |
          npx wtg build
      - text: "this will create a walkthrough.md file"
        command: |
          cat walkthrough.md
        results:


================================================
FILE: packages/walkthroughgen/jest.config.js
================================================
module.exports = {
  preset: 'ts-jest',
  testEnvironment: 'node',
  testMatch: ['**/test/**/*.ts'],
  testPathIgnorePatterns: ['/node_modules/', '/test/utils/'],
  transform: {
    '^.+\\.ts$': 'ts-jest',
  },
}; 

================================================
FILE: packages/walkthroughgen/package.json
================================================
{
  "name": "walkthroughgen",
  "version": "1.0.0",
  "main": "index.js",
  "scripts": {
    "test": "jest",
    "test:watch": "jest --watch"
  },
  "keywords": [],
  "author": "",
  "license": "ISC",
  "description": "",
  "dependencies": {
    "@boundaryml/baml": "^0.85.0",
    "@types/diff": "^7.0.2",
    "@types/js-yaml": "^4.0.9",
    "diff": "^7.0.0",
    "js-yaml": "^4.1.0",
    "typescript": "^5.8.3"
  },
  "devDependencies": {
    "@types/jest": "^29.5.14",
    "jest": "^29.7.0",
    "ts-jest": "^29.3.2"
  }
}


================================================
FILE: packages/walkthroughgen/prompt.md
================================================
Walkthroughgen is a tool for creating walkthroughs, tutorials, readmes, and documentation.

## Usage

You create a walkthrough by writing a simple yaml file that describes the walkthrough. In the file, you reference the incremental files that should exist at each step of the walkthrough

```
├── walkthrough
│   ├── 00-package-lock.json
│   ├── 00-package.json
│   ├── 01-index.ts
│   ├── 02-cli.ts
│   └── 02-index.ts
└── walkthrough.yaml
```

Your walkthrough.yaml file might look like this (runnable example in [examples/typescript-cli](./examples/typescript))

```yaml
title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"
targets:
  - markdown: "./build/walkthrough.md" # generates a walkthrough.md file
    onChange: # default behavior - on changes, show diffs and cp commands
      diff: true
      cp: true
    newFiles: # when new files are created, just show the copy command
      cat: false
      cp: true
  - final: "./build/final" # outputs the final project to the final folder
  - folders: "./build/by-section" # creates a separate working folder for each section
sections:
  - name: setup
    title: "Copy initial files"
    steps:
      - file: {src: ./walkthrough/00-package.json, dest: package.json}
      - file: {src: ./walkthrough/00-package-lock.json, dest: package-lock.json}
      - file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json}
  - name: initialize
    title: "Initialize the project"
    steps:
      - text: "initialize the project"
        command: |
          npm install
      - text: "then add index.ts"
        file: {src: ./walkthrough/01-index.ts, dest: src/index.ts}
      - text: "run it with tsx"
        command: |
          npx tsx src/index.ts
        results:
          - text: "you should see a hello world message"
            code: |
              hello world
  - name: add-cli
    title: "Add a CLI"
    steps:
      - text: "add a cli"
        file: {src: ./walkthrough/02-cli.ts, dest: src/cli.ts}
      - text: "add a cli"
        file: {src: ./walkthrough/02-index.ts, dest: src/index.ts}
```

Build the project with:

```
npm i -g wtg
wtg build
```

based on your targets, this would create the following files

```
├── walkthrough
│   ├── 00-package-lock.json
│   ├── 00-package.json
│   ├── 01-index.ts
│   ├── 02-cli.ts
│   └── 02-index.ts
├── build
│   ├── by-section
│   │   ├── 00-initialize # only contains the files in `init`
│   │   │   ├── readme.md # contains steps for this section
│   │   │   ├── package.json
│   │   │   ├── package-lock.json
│   │   │   └── tsconfig.json
│   │   └── 01-add-cli # contains the files up to the START of section 1
│   │       ├── readme.md # contains steps for this section
│   │       ├── package.json
│   │       ├── package-lock.json
│   │       ├── tsconfig.json
│   │       └── src
│   │           └── index.ts
│   ├── final
│   │   ├── package.json
│   │   ├── package-lock.json
│   │   ├── tsconfig.json
│   │   └── src
│   │       ├── cli.ts
│   │       └── index.ts
│   └── walkthrough.md

and your walkthrough.md file will look like:

```markdown
# Setting up a typescript cli

this is a walkthrough for setting up a typescript cli

## Copy initial files

  cp walkthrough/00-package.json package.json
  cp walkthrough/00-package-lock.json package-lock.json
  cp walkthrough/00-tsconfig.json tsconfig.json

## Initialize the project

initialize the project

     npm install

then add index.ts


    cp walkthrough/01-index.ts src/index.ts

and run it with tsx

    npx tsx src/index.ts

you should see a hello world message

    hello world

## Add a CLI

add a cli

    ```
    ```
 
    cp walkthrough/02-cli.ts src/cli.ts

update index.ts to use the cli

    ```diff
      const main = async () => {
      +    return cli();
      };
        
      main();
    ```

    or just:

    cp walkthrough/02-index.ts src/index.ts

```

## Features

### Targets

- `file`: generates a single markdown file
- `folder`: creates a set of folders, one for each section
- `final`: outputs the final project to the current directory

### Init


### Sections

### Steps

#### Step 


## Walkthrough.yaml for walkthroughgen

## Implementation Plan

- [ ] implement core walkthroughgen CLI - `wtg build` # defaults to walkthrough.yaml in current directory
- Scope 1: generating walkthrough.md
  - [ ] create end-to-end test for a simple walkthrough file, just a single yaml file with no sections
  - [ ] create end-to-end test for a walkthrough file with a single section
  - [ ] test generation of diffs and cp commands
- Scope 2: generating final/ project build
  - [ ] create end-to-end test for a walkthrough file with a final target
- Scope 3: generating by-section project builds with readmes
  - [ ] create end-to-end test for a walkthrough file with a by-section target

================================================
FILE: packages/walkthroughgen/readme.md
================================================
# Walkthroughgen

Walkthroughgen is a tool for creating walkthroughs, tutorials, readmes, and documentation. It helps you maintain step-by-step guides by generating markdown and working directories from a simple YAML configuration.

## Features

- 📝 **Markdown Generation**: Create beautiful markdown files with diffs, code blocks, and collapsible sections
- 📁 **Working Directories**: Generate separate directories for each section of your walkthrough
- 🔄 **Incremental Changes**: Track and display changes between steps
- 🎯 **Multiple Targets**: Output to markdown, section folders, and final project state
- 📦 **File Management**: Copy files, create directories, and run commands
- 🔍 **Rich Diffs**: Show meaningful diffs between file versions
- 📚 **Section READMEs**: Generate per-section documentation

## Installation

```bash
npm install -g walkthroughgen
```

## Quick Start

1. Create a `walkthrough.yaml` file:

```yaml
title: "My Tutorial"
text: "A step-by-step guide"
targets:
  - markdown: "./walkthrough.md"
    onChange:
      diff: true
      cp: true
  - folders:
      path: "./by-section"
      final:
        dirName: "final"
sections:
  - name: setup
    title: "Initial Setup"
    steps:
      - file: {src: ./files/package.json, dest: package.json}
      - command: "npm install"
```

2. Run the generator:

```bash
walkthroughgen generate walkthrough.yaml
```

## Directory Structure

A typical walkthrough project looks like this:

```
my-tutorial/
├── walkthrough/          # Source files for each step
│   ├── 00-package.json
│   ├── 01-index.ts
│   └── 02-config.ts
├── walkthrough.yaml     # Walkthrough configuration
└── build/              # Generated output
    ├── by-section/    # Section-by-section working directories
    │   ├── 00-setup/
    │   └── 01-config/
    ├── final/         # Final project state
    └── walkthrough.md # Generated markdown
```

## Walkthrough.yaml Configuration

### Top-Level Fields

- `title`: Title of the walkthrough
- `text`: Introduction text
- `targets`: Output configuration
- `sections`: Tutorial sections

### Targets

#### Markdown Target

```yaml
targets:
  - markdown: "./output.md"
    onChange:
      diff: true  # Show diffs for changed files
      cp: true    # Show cp commands
    newFiles:
      cat: false  # Don't show file contents
      cp: true    # Show cp commands
```

#### Folders Target

```yaml
targets:
  - folders:
      path: "./by-section"        # Base path for section folders
      skip: ["cleanup"]          # Sections to skip
      final:
        dirName: "final"        # Name for final state directory
```

### Sections

Each section represents a logical step in your tutorial:

```yaml
sections:
  - name: setup              # Used for folder naming and skip array
    title: "Initial Setup"   # Display title
    text: "Setup steps..."   # Section description
    steps:
      # ... steps ...
```

### Steps

Steps define the actions to take:

#### File Copy
```yaml
steps:
  - text: "Copy package.json"
    file:
      src: ./files/package.json
      dest: package.json
```

#### Directory Creation
```yaml
steps:
  - text: "Create src directory"
    dir:
      create: true
      path: src
```

#### Command Execution
```yaml
steps:
  - text: "Install dependencies"
    command: "npm install"
    incremental: true  # run when building up folders target
```

#### Command Results
```yaml
steps:
  - command: "npm run test"
    results:
      - text: "You should see:"
        code: |
          All tests passed!
```

## Generated Output

### Markdown Features

- **File Diffs**: Shows changes between versions
- **Copy Commands**: Easy-to-follow file copy instructions
- **Collapsible Sections**: Hide/show file contents
- **Code Highlighting**: Syntax highlighting for various languages

Example markdown output:

~~~markdown
# Initial Setup

Copy the package.json:

    cp ./files/package.json package.json

<details>
<summary>show file</summary>

```json
{
  "name": "my-project",
  "version": "1.0.0"
}
```
</details>

Install dependencies:

    npm install

You should see:

    added 123 packages
~~~

### Section Folders

The `folders` target creates:

1. A directory for each section
2. Section-specific README.md files
3. Working project state
4. Optional final state directory

## Examples

See the [examples](./examples) directory for complete examples:

- [TypeScript CLI](./examples/typescript): Basic TypeScript project setup
- [Walkthroughgen](./examples/walkthroughgen): Self-documenting example

## Tips

1. Use meaningful section names - they become folder names
2. Include context in step text
3. Use `incremental: true` for commands that modify state
4. Leverage diffs to highlight important changes
5. Use the `skip` array to exclude setup/cleanup sections from output

## Contributing

Contributions welcome! Please read [CONTRIBUTING.md](./CONTRIBUTING.md) for details.

## License

This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.


================================================
FILE: packages/walkthroughgen/src/cli.ts
================================================
import * as fs from 'fs';
import * as path from 'path';
import * as yaml from 'js-yaml';
import * as Diff from 'diff';
import { execSync } from 'child_process';

interface Section {
  title: string;
  text?: string;
  name?: string; // Optional, used for folder naming
  steps?: Array<{
    text?: string; // Make text optional
    file?: { src: string; dest: string };
    command?: string;
    incremental?: boolean; // New field: if true, command only runs for folders target
    dir?: { create: boolean; path: string }; // Added dir step type
    results?: Array<{ text: string; code: string }>;
  }>;
}

interface WalkthroughData {
  title: string;
  text: string;
  sections?: Section[];
  targets?: Array<{
    markdown?: string;
    folders?: {
      path: string; // Path for section folders, e.g. "./build/by-section"
      skip?: string[]; // Section names to skip folder creation for
      final?: {
        dirName: string; // Name of the final directory containing all steps' results
      };
    };
    onChange?: { diff?: boolean; cp?: boolean };
    newFiles?: { cat?: boolean; cp?: boolean };
  }>;
}

function getSectionBaseName(section: Section): string {
  return section.name || section.title.toLowerCase().replace(/[^a-z0-9]+/g, '-');
}

function copySourceFiles(srcFile: string, projectRoot: string, sectionDir: string): void {
  const srcAbsPath = path.resolve(projectRoot, srcFile);
  const relPath = path.relative(projectRoot, srcAbsPath);
  const destPath = path.join(sectionDir, relPath);
  fs.mkdirSync(path.dirname(destPath), { recursive: true });
  fs.copyFileSync(srcAbsPath, destPath);
}

function copyWorkingFile(srcFile: string, destFile: string, sectionDir: string): void {
  const srcPath = path.join(sectionDir, srcFile);
  const destPath = path.join(sectionDir, destFile);
  fs.mkdirSync(path.dirname(destPath), { recursive: true });
  fs.copyFileSync(srcPath, destPath);
}

function copyDirectory(src: string, dest: string): void {
  if (!fs.existsSync(src)) return;
  fs.mkdirSync(dest, { recursive: true });
  const entries = fs.readdirSync(src, { withFileTypes: true });

  for (const entry of entries) {
    const srcPath = path.join(src, entry.name);
    const destPath = path.join(dest, entry.name);

    if (entry.isDirectory()) {
      copyDirectory(srcPath, destPath);
    } else {
      fs.copyFileSync(srcPath, destPath);
    }
  }
}

function applyStepsToWorkingDir(
  steps: Section['steps'],
  projectRoot: string,
  workingDir: string,
  sectionPath: string | null = null // If provided, also copy source files to section's walkthrough/
): void {
  if (!steps) return;

  for (const step of steps) {
    // Handle dir creation
    if (step.dir?.create) {
      const dirToCreate = path.join(workingDir, step.dir.path);
      fs.mkdirSync(dirToCreate, { recursive: true });
    }

    // Handle file copy
    if (step.file?.src) {
      // Copy to working directory
      const srcAbsPath = path.resolve(projectRoot, step.file.src);
      const destPath = path.join(workingDir, step.file.dest);
      fs.mkdirSync(path.dirname(destPath), { recursive: true });
      fs.copyFileSync(srcAbsPath, destPath);

      // If a section path is provided, also copy source file to section's walkthrough/
      if (sectionPath) {
        copySourceFiles(step.file.src, projectRoot, sectionPath);
      }
    }

    // Handle command execution - only run if incremental is explicitly true
    if (step.command && step.incremental === true) {
      try {
        execSync(step.command, { cwd: workingDir, stdio: 'inherit' });
      } catch (error) {
        console.error(`Error executing incremental command "${step.command}" in ${workingDir}:`, error);
        // Log error but continue, matching behavior of file copy errors
      }
    }
  }
}

function generateSectionMarkdown(section: Section): string {
  let markdown = `# ${section.title}\n\n`;
  if (section.text) {
    markdown += `${section.text}\n\n`;
  }
  if (section.steps) {
    for (const step of section.steps) {
      if (step.text) {
        markdown += `${step.text}\n\n`;
      }
      if (step.dir?.create) {
        markdown += `    mkdir -p ${step.dir.path}\n\n`;
      }
      if (step.file) {
        markdown += `    cp ${step.file.src} ${step.file.dest}\n\n`;
      }
      if (step.command) {
        markdown += `    ${step.command.trim()}\n\n`;
      }
      if (step.results) {
        for (const result of step.results) {
          markdown += `${result.text}\n\n`;
          if (result.code) {
            markdown += result.code.trim().split('\n').map(line => `    ${line}`).join('\n') + '\n\n';
          }
        }
      }
    }
  }
  return markdown;
}

function formatMinimalDiff(filePath: string, oldContent: string, newContent: string): string | null {
  // Normalize line endings in both inputs
  const normalize = (str: string) => str.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
  const normalizedOld = normalize(oldContent);
  const normalizedNew = normalize(newContent);

  if (normalizedOld === normalizedNew) {
    return null;
  }

  // Using context: 2 to show some surrounding lines
  const patch = Diff.createPatch(filePath, normalizedOld, normalizedNew, '', '', { context: 2 });
  const patchLines = patch.split('\n');
  const effectiveChangeLines: string[] = [];

  let i = 0;
  while (i < patchLines.length) {
    const line = patchLines[i];

    // Skip standard patch headers and hunk metadata
    if (line.startsWith('---') || line.startsWith('+++') || line.startsWith('@@')) {
      i++;
      continue;
    }

    // Check for identical remove/add pairs (which means no effective change for these two lines)
    if (line.startsWith('-')) {
      let nextDiffLineIndex = i + 1;
      // Skip empty lines AND "No newline" markers AND context lines to find the next actual diff line
      while (nextDiffLineIndex < patchLines.length &&
             (patchLines[nextDiffLineIndex].trim() === '' ||
              patchLines[nextDiffLineIndex].startsWith('\\') ||
              patchLines[nextDiffLineIndex].startsWith(' '))) {
        nextDiffLineIndex++;
      }

      if (nextDiffLineIndex < patchLines.length && patchLines[nextDiffLineIndex].startsWith('+')) {
        const removedText = line.substring(1).trim();
        const addedText = patchLines[nextDiffLineIndex].substring(1).trim();
        if (removedText === addedText) {
          // Advance i past the current line, any skipped empty lines, and the matched added line
          i = nextDiffLineIndex + 1;
          continue;
        }
      }
    }

    // If the line starts with +, -, or space (context), it's a line to be included
    if (line.startsWith('+') || line.startsWith('-') || line.startsWith(' ')) {
      effectiveChangeLines.push(line);
    }
    
    i++;
  }

  if (effectiveChangeLines.length > 0) {
    return `\`\`\`diff\n${filePath}\n${effectiveChangeLines.join('\n')}\n\`\`\`\n\n`;
  }
  return null;
}

function generateRichSectionMarkdown(
  section: Section,
  projectRoot: string,
  sectionWorkingDir: string,
  walkthroughTargets: WalkthroughData['targets']
): string {
  let markdown = `# ${section.title}\n\n`;
  if (section.text) {
    markdown += `${section.text}\n\n`;
  }

  // Initialize section's virtual file state from the actual files in sectionWorkingDir
  const sectionVirtualFileState = new Map<string, string>();
  if (fs.existsSync(sectionWorkingDir)) {
    const readFilesRecursively = (dir: string) => {
      const entries = fs.readdirSync(dir, { withFileTypes: true });
      for (const entry of entries) {
        const fullPath = path.join(dir, entry.name);
        const relativePath = path.relative(sectionWorkingDir, fullPath);
        if (entry.isDirectory()) {
          readFilesRecursively(fullPath);
        } else {
          try {
            const content = fs.readFileSync(fullPath, 'utf8');
            sectionVirtualFileState.set(relativePath, content);
          } catch (error) {
            console.warn(`Warning: Could not read file ${fullPath} for section README state`);
          }
        }
      }
    };
    readFilesRecursively(sectionWorkingDir);
  }

  if (section.steps) {
    for (const step of section.steps) {
      if (step.text) {
        markdown += `${step.text}\n\n`;
      }

      if (step.dir?.create) {
        markdown += `    mkdir -p ${step.dir.path}\n\n`;
      }

      if (step.file) {
        const srcAbsolutePath = path.resolve(projectRoot, step.file.src);
        const destRelativePath = path.normalize(step.file.dest);

        let newContent: string;
        try {
          newContent = fs.readFileSync(srcAbsolutePath, 'utf8');
        } catch (error: any) {
          console.warn(`Warning: Could not read source file ${srcAbsolutePath} for step: ${step.text || 'Unnamed step'}`);
          continue;
        }

        const isExistingVirtualFile = sectionVirtualFileState.has(destRelativePath);
        const oldContent = isExistingVirtualFile ? sectionVirtualFileState.get(destRelativePath)! : '';

        if (isExistingVirtualFile) {
          // File is being changed/overwritten
          const shouldDiff = walkthroughTargets?.[0]?.onChange?.diff === true;
          let diffPrintedThisStep = false;

          if (shouldDiff && oldContent !== newContent) {
            const diffOutput = formatMinimalDiff(destRelativePath, oldContent, newContent);
            if (diffOutput) {
              markdown += diffOutput;
              diffPrintedThisStep = true;
            }
          }

          const showCp = walkthroughTargets?.[0]?.onChange?.cp !== false;
          if (showCp) {
            const cpCommand = `cp ${step.file.src} ${step.file.dest}`;
            if (diffPrintedThisStep) {
              markdown += `<details>\n<summary>skip this step</summary>\n\n`;
              markdown += `    ${cpCommand}\n\n`;
              markdown += `</details>\n\n`;
            } else {
              markdown += `    ${cpCommand}\n\n`;
              
              // Add "show file" details block
              let lang = path.extname(step.file.src).substring(1);
              if (lang === 'baml') {
                lang = 'rust';
              }
              markdown += `<details>\n<summary>show file</summary>\n\n`;
              markdown += `\`\`\`${lang}\n`;
              markdown += `// ${step.file.src}\n`;
              markdown += `${newContent.trim()}\n`;
              markdown += `\`\`\`\n\n`;
              markdown += `</details>\n\n`;
            }
          }
        } else {
          // New file
          const showCpForNew = walkthroughTargets?.[0]?.newFiles?.cp !== false;
          if (showCpForNew) {
            const cpCommand = `cp ${step.file.src} ${step.file.dest}`;
            markdown += `    ${cpCommand}\n\n`;

            // Add "show file" details block
            let lang = path.extname(step.file.src).substring(1);
            if (lang === 'baml') {
              lang = 'rust';
            }
            markdown += `<details>\n<summary>show file</summary>\n\n`;
            markdown += `\`\`\`${lang}\n`;
            markdown += `// ${step.file.src}\n`;
            markdown += `${newContent.trim()}\n`;
            markdown += `\`\`\`\n\n`;
            markdown += `</details>\n\n`;
          }
        }

        sectionVirtualFileState.set(destRelativePath, newContent);
      }

      if (step.command) {
        markdown += step.command.trim().split('\n').map(line => `    ${line}`).join('\n') + '\n\n';
      }

      if (step.results) {
        for (const result of step.results) {
          markdown += `${result.text}\n\n`;
          if (result.code) {
            markdown += result.code.trim().split('\n').map(line => `    ${line}`).join('\n') + '\n\n';
          }
        }
      }
    }
  }
  return markdown;
}

export const cli = (argv: string[]) => {
  if (argv.includes("--help") || argv.includes("-h")) {
    console.log(`
USAGE:
    walkthroughgen generate <yaml-file> [options]

OPTIONS:
    --help, -h    Show help
    generate <yaml-file>    Generate markdown from YAML file
        `);
    return;
  }

  if (argv[0] === "generate") {
    if (!argv[1]) {
      console.error("Error: YAML file path is required for 'generate' command.");
      console.log("Usage: walkthroughgen generate <yaml-file>");
      return;
    }

    const yamlPath = argv[1];
    let yamlContent;
    try {
      yamlContent = fs.readFileSync(yamlPath, 'utf8');
    } catch (error: any) {
      console.error(`Error: Could not read YAML file at '${yamlPath}'.`);
      console.error(error.message);
      return;
    }
    
    let data: WalkthroughData;
    try {
      data = yaml.load(yamlContent) as WalkthroughData;
    } catch (error: any) {
      console.error(`Error: Could not parse YAML content from '${yamlPath}'.`);
      console.error(error.message);
      return;
    }

    if (!data || typeof data.title !== 'string' || typeof data.text !== 'string') {
      console.error(`Error: Invalid YAML structure in '${yamlPath}'. Missing required 'title' or 'text' fields.`);
      return;
    }

    // Track virtual file state for diff generation
    const projectRoot = path.dirname(yamlPath);
    const virtualFileState = new Map<string, string>();
    
    // Process folders target first
    if (data.targets) {
      for (const target of data.targets) {
        // Ensure target.folders is an object with a path property
        if (target.folders && typeof target.folders === 'object' && target.folders.path) {
          const currentFoldersTarget = target.folders; // Assign to a new const for type narrowing
          const foldersBasePath = path.join(path.dirname(yamlPath), currentFoldersTarget.path);
          console.log('Creating folders base path:', foldersBasePath);
          fs.mkdirSync(foldersBasePath, { recursive: true });

          // Create a temporary working directory to build up state
          const workingDirName = `.tmp-working-${Date.now()}`;
          const workingDir = path.join(foldersBasePath, workingDirName);
          console.log('Creating working directory:', workingDir);
          fs.mkdirSync(workingDir, { recursive: true });

          try {
            // Create section folders and build up working state
            if (data.sections) {
              let visibleSectionIndex = 0; // Counter for non-skipped sections
              data.sections.forEach((section, originalIndex) => {
                const baseName = getSectionBaseName(section);
                
                // For logging, use original index to be clear about which section from YAML it is
                const logSectionIdentifier = `${String(originalIndex).padStart(2, '0')}-${baseName}`;
                console.log('Processing section:', logSectionIdentifier, 'with name:', section.name);
                
                const shouldSkip = currentFoldersTarget.skip?.includes(section.name || '');

                let sectionPathForApplySteps: string | null = null;

                if (!shouldSkip) {
                  // Use visibleSectionIndex for the actual folder name
                  const sectionFolderName = `${String(visibleSectionIndex).padStart(2, '0')}-${baseName}`;
                  const sectionPath = path.join(foldersBasePath, sectionFolderName);
                  console.log('Creating section folder:', sectionPath);
                  fs.mkdirSync(sectionPath, { recursive: true });

                  // Copy current working state to section folder
                  if (fs.existsSync(workingDir) && fs.readdirSync(workingDir).length > 0) {
                    copyDirectory(workingDir, sectionPath);
                  }

                  // Generate and write section README
                  const sectionMarkdown = generateRichSectionMarkdown(section, projectRoot, sectionPath, data.targets);
                  fs.writeFileSync(path.join(sectionPath, 'README.md'), sectionMarkdown);
                  
                  sectionPathForApplySteps = sectionPath;
                  visibleSectionIndex++; // Increment only for sections that get a folder
                }
                
                // Apply steps to working directory
                applyStepsToWorkingDir(section.steps, projectRoot, workingDir, sectionPathForApplySteps);
              });

              // Create final directory if specified
              if (currentFoldersTarget.final?.dirName) {
                const finalDirPath = path.join(foldersBasePath, currentFoldersTarget.final.dirName);
                fs.mkdirSync(finalDirPath, { recursive: true });
                copyDirectory(workingDir, finalDirPath);

                // Optional: Generate cumulative README for final directory
                const finalReadme = data.sections
                  .filter(s => !currentFoldersTarget.skip?.includes(s.name || ''))
                  .map(s => generateSectionMarkdown(s))
                  .join('\n');
                fs.writeFileSync(path.join(finalDirPath, 'README.md'), finalReadme);
              }
            }
          } finally {
            // Clean up working directory
            if (fs.existsSync(workingDir)) {
              fs.rmSync(workingDir, { recursive: true, force: true });
            }
          }
        }
      }
    }
    
    let markdown = `# ${data.title}\n\n${data.text}\n\n`;
    
    if (data.sections) {
      for (const section of data.sections) {
        markdown += `## ${section.title}\n\n`;
        if (section.text) {
          markdown += `${section.text}\n\n`;
        }
        if (section.steps) {
          for (const step of section.steps) {
            if (step.text) { // Only add step.text if it exists
              markdown += `${step.text}\n\n`;
            }
            if (step.file) {
              const srcAbsolutePath = path.resolve(projectRoot, step.file.src);
              const destRelativePath = path.normalize(step.file.dest);

              let newContent: string;
              try {
                newContent = fs.readFileSync(srcAbsolutePath, 'utf8');
              } catch (error: any) {
                console.warn(`Warning: Could not read source file ${srcAbsolutePath} for step: ${step.text || 'Unnamed step'}`);
                continue;
              }

              const isExistingVirtualFile = virtualFileState.has(destRelativePath);
              const oldContent = isExistingVirtualFile ? virtualFileState.get(destRelativePath)! : '';

              if (isExistingVirtualFile) {
                // File is being changed/overwritten
                const shouldDiff = data.targets?.[0]?.onChange?.diff === true;
                let diffPrintedThisStep = false;

                if (shouldDiff && oldContent !== newContent) {
                  const diffOutput = formatMinimalDiff(destRelativePath, oldContent, newContent);
                  if (diffOutput) {
                    markdown += diffOutput;
                    diffPrintedThisStep = true;
                  }
                }

                const showCp = data.targets?.[0]?.onChange?.cp !== false;
                if (showCp) {
                  const cpCommand = `cp ${step.file.src} ${step.file.dest}`;
                  if (diffPrintedThisStep) {
                    markdown += `<details>\n<summary>skip this step</summary>\n\n`;
                    markdown += `    ${cpCommand}\n\n`;
                    markdown += `</details>\n\n`;
                  } else {
                    markdown += `    ${cpCommand}\n\n`;
                    
                    // Add "show file" details block
                    let lang = path.extname(step.file.src).substring(1);
                    if (lang === 'baml') {
                      lang = 'rust';
                    }
                    markdown += `<details>\n<summary>show file</summary>\n\n`;
                    markdown += `\`\`\`${lang}\n`;
                    markdown += `// ${step.file.src}\n`;
                    markdown += `${newContent.trim()}\n`;
                    markdown += `\`\`\`\n\n`;
                    markdown += `</details>\n\n`;
                  }
                }
              } else {
                // New file
                const showCpForNew = data.targets?.[0]?.newFiles?.cp !== false;
                if (showCpForNew) {
                  const cpCommand = `cp ${step.file.src} ${step.file.dest}`;
                  markdown += `    ${cpCommand}\n\n`;

                  // Add "show file" details block
                  let lang = path.extname(step.file.src).substring(1);
                  if (lang === 'baml') {
                    lang = 'rust';
                  }
                  markdown += `<details>\n<summary>show file</summary>\n\n`;
                  markdown += `\`\`\`${lang}\n`;
                  markdown += `// ${step.file.src}\n`;
                  markdown += `${newContent.trim()}\n`;
                  markdown += `\`\`\`\n\n`;
                  markdown += `</details>\n\n`;
                }
              }

              virtualFileState.set(destRelativePath, newContent);
            }
            if (step.command) { // Always show commands in markdown
              let commandLine = `    ${step.command.trim()}`;
              markdown += commandLine;
              markdown += "\n\n";
            }
            if (step.results) {
              for (const result of step.results) {
                markdown += `${result.text}\n\n`;
                if (result.code) {
                  markdown += result.code.trim().split('\n').map(line => `    ${line}`).join('\n') + '\n\n';
                }
              }
            }
          }
        }
      }
    }

    const outputPath = data.targets?.[0]?.markdown 
      ? path.join(path.dirname(yamlPath), data.targets[0].markdown)
      : path.join(path.dirname(yamlPath), 'walkthrough.md');
    
    try {
      fs.mkdirSync(path.dirname(outputPath), { recursive: true });
      fs.writeFileSync(outputPath, markdown);
      console.log(`Successfully generated walkthrough to ${outputPath}`);
    } catch (error: any) {
      console.error(`Error: Could not write markdown file to '${outputPath}'.`);
      console.error(error.message);
      return;
    }
    return;
  }

  console.log("Unknown command. Available commands: generate. Use --help for more info.");
};


================================================
FILE: packages/walkthroughgen/src/index.ts
================================================
import { cli } from "./cli";

const main = async () => {
  cli(process.argv.slice(2));
};

main().catch(console.error);


================================================
FILE: packages/walkthroughgen/test/e2e/test-e2e.ts
================================================
import * as fs from 'fs';
import * as path from 'path';
import { cli } from "../../src/cli";
import { withMockedConsole } from "../utils/console-mock";
import { withTmpDir } from "../utils/temp-dir";

describe("CLI basics", () => {
  it("should handle --help flag", () => {
    const output = withMockedConsole(() => {
      cli(["--help"]);
    });

    expect(output).toContain("USAGE:");
    expect(output).toContain("OPTIONS:");
    expect(output).toContain("--help, -h");
  });

  it("should handle -h flag", () => {
    const output = withMockedConsole(() => {
      cli(["-h"]);
    });

    expect(output).toContain("USAGE:");
    expect(output).toContain("OPTIONS:");
    expect(output).toContain("--help, -h");
  });

  it("should show error for missing yaml file path", () => {
    const output = withMockedConsole(() => {
      cli(["generate"]);
    });

    expect(output).toContain("Error: YAML file path is required");
  });

  it("should show error for non-existent yaml file", () => {
    const output = withMockedConsole(() => {
      cli(["generate", "non-existent.yaml"]);
    });

    expect(output).toContain("Error: Could not read YAML file");
  });

  it("should show error for invalid yaml content", () => {
    withTmpDir((tempDir: string) => {
      fs.writeFileSync(
        path.join(tempDir, 'invalid.yaml'),
        `invalid: yaml: content: [}`
      );

      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "invalid.yaml")]);
      });

      expect(output).toContain("Error: Could not parse YAML content");
    });
  });

  it("should show error for missing required fields", () => {
    withTmpDir((tempDir: string) => {
      fs.writeFileSync(
        path.join(tempDir, 'missing-fields.yaml'),
        `some_field: "some value"`
      );

      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "missing-fields.yaml")]);
      });

      expect(output).toContain("Error: Invalid YAML structure");
      expect(output).toContain("Missing required 'title' or 'text' fields");
    });
  });

  it("should show unknown command message", () => {
    const output = withMockedConsole(() => {
      cli(["unknown"]);
    });

    expect(output).toContain("Unknown command");
    expect(output).toContain("Available commands: generate");
  });
}); 

describe("CLI generate basic markdown", () => {
  it("should generate basic markdown", () => {
    withTmpDir((tempDir: string) => {
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"`
      );

      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
      });

      expect(fs.existsSync(path.join(tempDir, 'walkthrough.md'))).toBe(true);
      const content = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8');
      expect(content).toContain("# setting up a typescript cli");
      expect(content).toContain("this is a walkthrough for setting up a typescript cli");
      expect(output).toContain("Successfully generated walkthrough");
    });
  });

  it("should generate markdown with a section", () => {
    withTmpDir((tempDir: string) => {
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"
sections:
  - title: "Installation"
    text: "First, let's install the necessary dependencies"`
      );

      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
      });

      expect(fs.existsSync(path.join(tempDir, 'walkthrough.md'))).toBe(true);
      const content = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8');
      expect(content).toContain("# setting up a typescript cli");
      expect(content).toContain("this is a walkthrough for setting up a typescript cli");
      expect(content).toContain("## Installation");
      expect(content).toContain("First, let's install the necessary dependencies");
      expect(output).toContain("Successfully generated walkthrough");
    });
  });

  it("should generate markdown with sections and steps", () => {
    withTmpDir((tempDir: string) => {
      fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
      
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"
targets:
  - markdown: "./build/walkthrough.md"
    onChange:
      diff: true
      cp: true
sections:
  - name: setup
    title: "Initial Setup"
    steps:
      - text: "Create package.json"
        file: {src: ./walkthrough/00-package.json, dest: package.json}
      - text: "Install dependencies"
        command: |
          npm install
        results:
          - text: "You should see packages being installed"
            code: |
              added 123 packages`
      );
      
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/00-package.json'),
        `{
          "name": "walkthroughgen",
          "version": "1.0.0",
          "description": "A CLI tool for generating walkthroughs",
          "dependencies": {
            "typescript": "^5.0.0"
          }
        }`
      );
      
      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
      });

      expect(fs.existsSync(path.join(tempDir, 'build/walkthrough.md'))).toBe(true);
      const content = fs.readFileSync(path.join(tempDir, 'build/walkthrough.md'), 'utf8').replace(/\r\n/g, '\n');
      expect(content).toContain(`
# setting up a typescript cli

this is a walkthrough for setting up a typescript cli

## Initial Setup

Create package.json

    cp ./walkthrough/00-package.json package.json

<details>
<summary>show file</summary>

\`\`\`json
// ./walkthrough/00-package.json
{
          "name": "walkthroughgen",
          "version": "1.0.0",
          "description": "A CLI tool for generating walkthroughs",
          "dependencies": {
            "typescript": "^5.0.0"
          }
        }
\`\`\`

</details>

Install dependencies

    npm install

You should see packages being installed

    added 123 packages
    `.trim());
      expect(output).toContain("Successfully generated walkthrough");
    });
  });
});

describe("CLI generate from example", () => {
  it("should generate markdown from the typescript example", () => {
    withTmpDir((tempDir: string) => {
      const exampleBasePath = path.resolve(__dirname, '../../examples/typescript');
      const exampleWalkthroughDir = path.join(exampleBasePath, 'walkthrough');
      
      // Copy walkthrough.yaml
      const sourceYamlPath = path.join(exampleBasePath, 'walkthrough.yaml');
      const destYamlPath = path.join(tempDir, 'walkthrough.yaml');
      fs.copyFileSync(sourceYamlPath, destYamlPath);

      // Copy walkthrough directory recursively
      const destWalkthroughSubDir = path.join(tempDir, 'walkthrough');
      fs.cpSync(exampleWalkthroughDir, destWalkthroughSubDir, { recursive: true });

      // Run CLI
      const output = withMockedConsole(() => {
        cli(["generate", destYamlPath]);
      });

      // Assertions
      const expectedMarkdownPath = path.join(tempDir, 'build/walkthrough.md');
      expect(fs.existsSync(expectedMarkdownPath)).toBe(true);
      expect(output).toContain("Successfully generated walkthrough");

      // Content checks
      const markdownContent = fs.readFileSync(expectedMarkdownPath, 'utf8').replace(/\r\n/g, '\n');
      expect(markdownContent).toContain("# setting up a typescript cli");
      expect(markdownContent).toContain("## Copy initial files");
      expect(markdownContent).toContain("cp ./walkthrough/00-package.json package.json");
    });
  });
});

describe("CLI generate with diffs", () => {
  it("should show diffs when files are overwritten", () => {
    withTmpDir((tempDir: string) => {
      fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
      
      // Create initial package.json
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/v1-package.json'),
        `{
  "name": "example",
  "version": "1.0.0",
  "dependencies": {
    "typescript": "^5.0.0"
  }
}`
      );

      // Create updated package.json with a new dependency
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/v2-package.json'),
        `{
  "name": "example",
  "version": "1.0.0",
  "dependencies": {
    "typescript": "^5.0.0",
    "express": "^4.18.0"
  }
}`
      );

      // Create walkthrough.yaml that updates package.json
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "Test Diff Generation"
text: "Testing diff generation for file updates"
targets:
  - markdown: "./walkthrough.md"
    onChange:
      diff: true
      cp: true
sections:
  - title: "Initial Setup"
    steps:
      - text: "Create initial package.json"
        file: {src: ./walkthrough/v1-package.json, dest: package.json}
  - title: "Add Express"
    steps:
      - text: "Add express dependency"
        file: {src: ./walkthrough/v2-package.json, dest: package.json}`
      );

      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
      });

      expect(fs.existsSync(path.join(tempDir, 'walkthrough.md'))).toBe(true);
      const content = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8').replace(/\r\n/g, '\n');

      // First file copy should not have a diff (it's new)
      expect(content).toContain("Create initial package.json");
      expect(content).toContain("cp ./walkthrough/v1-package.json package.json");
      expect(content).toContain(`<details>
<summary>show file</summary>

\`\`\`json
// ./walkthrough/v1-package.json
{
  "name": "example",
  "version": "1.0.0",
  "dependencies": {
    "typescript": "^5.0.0"
  }
}
\`\`\`

</details>`);

      // Second file copy should have a diff (it's an update)
      expect(content).toContain("Add express dependency");
      expect(content).toContain("```diff\npackage.json\n   \"version\": \"1.0.0\",\n   \"dependencies\": {\n-    \"typescript\": \"^5.0.0\"\n+    \"typescript\": \"^5.0.0\",\n+    \"express\": \"^4.18.0\"\n   }\n }");
      expect(content).toContain(`<details>
<summary>skip this step</summary>

    cp ./walkthrough/v2-package.json package.json

</details>`);

      expect(output).toContain("Successfully generated walkthrough");
    });
  });
});

describe("CLI generate with folders target", () => {
  it("should create base folders directory", () => {
    withTmpDir((tempDir: string) => {
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "Test Folders"
text: "Testing folders target"
targets:
  - folders: { path: "./build/by-section" }
sections:
  - title: "First Section"
    text: "First section text"`
      );

      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
      });

      expect(fs.existsSync(path.join(tempDir, 'build/by-section'))).toBe(true);
      expect(output).toContain("Successfully generated walkthrough");
    });
  });

  it("should create first section folder with README", () => {
    withTmpDir((tempDir: string) => {
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "Test Folders"
text: "Testing folders target"
targets:
  - folders: { path: "./build/by-section" }
sections:
  - name: first-section
    title: "First Section"
    text: "First section text"`
      );

      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
      });

      const sectionPath = path.join(tempDir, 'build/by-section/00-first-section');
      expect(fs.existsSync(sectionPath)).toBe(true);
      expect(fs.existsSync(path.join(sectionPath, 'README.md'))).toBe(true);

      // Check README content
      const readmeContent = fs.readFileSync(path.join(sectionPath, 'README.md'), 'utf8');
      expect(readmeContent).toContain("# First Section");
      expect(readmeContent).toContain("First section text");
    });
  });

  it("should copy files to the section's working directory", () => {
    withTmpDir((tempDir: string) => {
      // Create source file
      fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/file.ts'),
        'console.log("hello");'
      );

      // Create walkthrough.yaml
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "Test Folders"
text: "Testing folders target"
targets:
  - folders: { path: "./build/by-section" }
sections:
  - name: first-section
    title: "First Section"
    text: "First section text"
    steps:
      - text: "Add a file"
        file: {src: ./walkthrough/file.ts, dest: src/file.ts}`
      );

      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
      });

      // Check source file was copied to section's walkthrough directory
      const sectionPath = path.join(tempDir, 'build/by-section/00-first-section');
      expect(fs.existsSync(path.join(sectionPath, 'walkthrough/file.ts'))).toBe(true);

      // Check file was NOT copied to its destination within the section
      // (section folders only contain state BEFORE their own steps)
      expect(fs.existsSync(path.join(sectionPath, 'src/file.ts'))).toBe(false);

      // Check README includes the step
      const readmeContent = fs.readFileSync(path.join(sectionPath, 'README.md'), 'utf8');
      expect(readmeContent).toContain("Add a file");
      expect(readmeContent).toContain("cp ./walkthrough/file.ts src/file.ts");
    });
  });

  it("should include files from previous sections", () => {
    withTmpDir((tempDir: string) => {
      // Create source files
      fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/file1.ts'),
        'console.log("hello 1");'
      );
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/file2.ts'),
        'console.log("hello 2");'
      );

      // Create walkthrough.yaml with two sections
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "Test Folders"
text: "Testing folders target"
targets:
  - folders: { path: "./build/by-section" }
sections:
  - name: first-section
    title: "First Section"
    text: "First section text"
    steps:
      - text: "Add first file"
        file: {src: ./walkthrough/file1.ts, dest: src/file1.ts}
  - name: second-section
    title: "Second Section"
    text: "Second section text"
    steps:
      - text: "Add second file"
        file: {src: ./walkthrough/file2.ts, dest: src/file2.ts}`
      );

      const output = withMockedConsole(() => {
        cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
      });

      // Check first section does NOT have its own file
      // (section folders only contain state BEFORE their own steps)
      const firstSectionPath = path.join(tempDir, 'build/by-section/00-first-section');
      expect(fs.existsSync(path.join(firstSectionPath, 'src/file1.ts'))).toBe(false);

      // Check second section has first section's file but NOT its own file
      const secondSectionPath = path.join(tempDir, 'build/by-section/01-second-section');
      expect(fs.existsSync(path.join(secondSectionPath, 'src/file1.ts'))).toBe(true);
      expect(fs.existsSync(path.join(secondSectionPath, 'src/file2.ts'))).toBe(false);

      // Check READMEs
      const firstReadme = fs.readFileSync(path.join(firstSectionPath, 'README.md'), 'utf8');
      expect(firstReadme).toContain("Add first file");
      expect(firstReadme).toContain("cp ./walkthrough/file1.ts src/file1.ts");

      const secondReadme = fs.readFileSync(path.join(secondSectionPath, 'README.md'), 'utf8');
      expect(secondReadme).toContain("Add second file");
      expect(secondReadme).toContain("cp ./walkthrough/file2.ts src/file2.ts");
    });
  });

  it("should correctly generate section folders with dir creation and specific file content", () => {
    withTmpDir((tempDir: string) => {
      // --- Setup source files ---
      fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
      
      // package.json for hello-world section
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/00-package.json'),
        JSON.stringify({ name: "hello-world-pkg", dependencies: {} }, null, 2)
      );
      // tsconfig.json for hello-world section
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/00-tsconfig.json'),
        JSON.stringify({ compilerOptions: { target: "esnext" } }, null, 2)
      );

      // This is the content EXPECTED in hello-world/src/index.ts
      const expectedHelloWorldIndexContent = 'console.log("hello, world!"); // Simple version';
      // The YAML for hello-world section will point to this source file.
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/01-index.ts'), // As per user's YAML for hello-world
        expectedHelloWorldIndexContent
      );

      // This is the content that the user sees INCORRECTLY appearing in hello-world/src/index.ts.
      // This file won't be directly referenced by the hello-world section in this test's YAML.
      // If this content appears, it means something is wrong with file sourcing or cumulative logic.
      const cliIndexContent = 'import { cli } from "./cli"; cli(); // CLI version';
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/02-index.ts'), // A different file
        cliIndexContent
      );

      const cliTSContent = 'export function cli() { console.log("cli"); }';
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/02-cli.ts'), // A different file
        cliTSContent
      );

      // --- Setup walkthrough.yaml ---
      const walkthroughYamlContent = `
title: "Test Folders Feature"
text: "Testing dir creation and file content isolation between sections."
targets:
  - folders:
      path: "./build/sections"
      skip:
        - "cleanup"
      final:
        dirName: "final"
      
sections:
  - name: cleanup
    title: "Cleanup Section"
    steps:
      - text: "Simulate cleanup (command is illustrative, not run by folders target)"
        command: "rm -rf src/"
  - name: hello-world
    title: "Hello World Section"
    steps:
      - text: "Copy package.json"
        file: {src: ./walkthrough/00-package.json, dest: package.json}
      - text: "Copy tsconfig.json"
        file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json}
      - text: "Create src folder"
        dir: {create: true, path: src}
      - text: "Add simple hello world index.ts"
        file: {src: ./walkthrough/01-index.ts, dest: src/index.ts} # Points to expectedHelloWorldIndexContent
  - name: cli-version # A subsequent section
    title: "CLI Version Section"
    steps:
      - text: "add a CLI"
        file: {src: ./walkthrough/02-cli.ts, dest: src/cli.ts} # adds src/cli.ts
      - text: "Update index.ts to CLI version"
        file: {src: ./walkthrough/02-index.ts, dest: src/index.ts} # Overwrites src/index.ts
  - name: runnable
    title: "run the cli"
    steps:
      - text: "run the cli"
        command: "npx tsx src/index.ts"
`;
      fs.writeFileSync(path.join(tempDir, 'walkthrough.yaml'), walkthroughYamlContent);

      // --- Run CLI ---
      cli(["generate", path.join(tempDir, "walkthrough.yaml")]);

      // --- Assertions ---
      const cleanupSectionPath = path.join(tempDir, 'build/sections/00-cleanup');
      const helloWorldSectionPath = path.join(tempDir, 'build/sections/00-hello-world');
      const cliSectionPath = path.join(tempDir, 'build/sections/01-cli-version');
      const finalSectionPath = path.join(tempDir, 'build/sections/final');

      //
      // Cleanup Section
      //
      // cleanup has skip:true so it should not exist
      expect(fs.existsSync(cleanupSectionPath)).toBe(false);

      //
      // Hello World Section
      //
      // Assert hello-world section - this should have the results of the previous step (NOTHING)
      expect(fs.existsSync(helloWorldSectionPath)).toBe(true);
      // Check package.json and tsconfig.json don't exist yet
      expect(fs.existsSync(path.join(helloWorldSectionPath, 'src'))).toBe(false); 
      expect(fs.existsSync(path.join(helloWorldSectionPath, 'package.json'))).toBe(false);
      expect(fs.existsSync(path.join(helloWorldSectionPath, 'tsconfig.json'))).toBe(false);
      

      //
      // CLI Section
      //
      // The cli section should contain the results of the hell-world section
      const packageJSONPath = path.join(cliSectionPath, 'package.json');
      const tsconfigJSONPath = path.join(cliSectionPath, 'tsconfig.json');
      const indexTSPath = path.join(cliSectionPath, 'src/index.ts');

      expect(fs.existsSync(packageJSONPath)).toBe(true); 
      expect(fs.existsSync(tsconfigJSONPath)).toBe(true);
      expect(fs.existsSync(indexTSPath)).toBe(true);
      const packageJSONContent = fs.readFileSync(packageJSONPath, 'utf8');
      expect(packageJSONContent).toContain("hello-world-pkg");
      const tsconfigJSONContent = fs.readFileSync(tsconfigJSONPath, 'utf8');
      expect(tsconfigJSONContent).toContain("\"target\": \"esnext\"");
      const indexTSContent = fs.readFileSync(indexTSPath, 'utf8');
      expect(indexTSContent).toContain("console.log(\"hello, world!\");");

      //
      // Final Section
      //
      // the final folder, marked by "final: dirName: final" should contain all the files from the last section
      expect(fs.existsSync(finalSectionPath)).toBe(true);
      expect(fs.existsSync(path.join(finalSectionPath, 'src/index.ts'))).toBe(true);
      expect(fs.existsSync(path.join(finalSectionPath, 'src/cli.ts'))).toBe(true);
      expect(fs.existsSync(path.join(finalSectionPath, 'package.json'))).toBe(true);
      expect(fs.existsSync(path.join(finalSectionPath, 'tsconfig.json'))).toBe(true);
      // Verify index.ts calls the cli function
      const finalIndexContent = fs.readFileSync(path.join(finalSectionPath, 'src/index.ts'), 'utf8');
      expect(finalIndexContent).toContain(cliIndexContent);
      const finalCliContent = fs.readFileSync(path.join(finalSectionPath, 'src/cli.ts'), 'utf8');
      expect(finalCliContent).toContain(cliTSContent);
    });
  });

  it("should execute commands in the working directory for folders target", () => {
    withTmpDir((tempDir: string) => {
      // Create walkthrough.yaml
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "Test Command Execution in Folders"
text: "Testing command execution"
targets:
  - folders:
      path: "./build/cmd-test"
      final:
        dirName: "final-cmd"
sections:
  - name: section-with-command
    title: "Section with Command"
    steps:
      - text: "Create a file via command"
        command: "echo 'command content' > command_file.txt"
        incremental: true
  - name: next-section
    title: "Next Section"
    steps:
      - text: "Another step"
        command: "echo 'another' > another_file.txt"
        incremental: true`
      );

      // Run CLI
      cli(["generate", path.join(tempDir, "walkthrough.yaml")]);

      // Assertions
      const firstSectionPath = path.join(tempDir, 'build/cmd-test/00-section-with-command');
      const secondSectionPath = path.join(tempDir, 'build/cmd-test/01-next-section');
      const finalPath = path.join(tempDir, 'build/cmd-test/final-cmd');

      // First section should NOT have its own command's file
      expect(fs.existsSync(path.join(firstSectionPath, 'command_file.txt'))).toBe(false);

      // Second section SHOULD have first section's command's file
      expect(fs.existsSync(path.join(secondSectionPath, 'command_file.txt'))).toBe(true);
      // But should NOT have its own command's file
      expect(fs.existsSync(path.join(secondSectionPath, 'another_file.txt'))).toBe(false);

      // Final folder should have both files
      expect(fs.existsSync(path.join(finalPath, 'command_file.txt'))).toBe(true);
      expect(fs.existsSync(path.join(finalPath, 'another_file.txt'))).toBe(true);

      // Check file contents
      const commandFileContent = fs.readFileSync(path.join(secondSectionPath, 'command_file.txt'), 'utf8').trim();
      expect(commandFileContent).toBe('command content');
      const finalAnotherFileContent = fs.readFileSync(path.join(finalPath, 'another_file.txt'), 'utf8').trim();
      expect(finalAnotherFileContent).toBe('another');
    });
  });

  it("should handle incremental commands correctly", () => {
    withTmpDir((tempDir: string) => {
      // Create walkthrough.yaml
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "Test Incremental Commands"
text: "Testing incremental command behavior"
targets:
  - markdown: "./walkthrough.md"
  - folders:
      path: "./build/cmd-test"
      final:
        dirName: "final"
sections:
  - name: section-with-commands
    title: "Section with Commands"
    steps:
      - text: "Regular command (not executed in folders, shown in MD)"
        command: "echo 'regular command' > regular.txt"
      - text: "Incremental command (executed in folders, shown in MD)"
        command: "echo 'incremental command' > incremental.txt"
        incremental: true
      - text: "Another regular command (not executed in folders, shown in MD)"
        command: "echo 'another regular' > another_regular.txt"
        incremental: false`
      );

      // Run CLI
      cli(["generate", path.join(tempDir, "walkthrough.yaml")]);

      // Check markdown output - ALL commands should be in markdown
      const markdownContent = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8');
      expect(markdownContent).toContain("echo 'regular command' > regular.txt");
      expect(markdownContent).toContain("echo 'incremental command' > incremental.txt");
      expect(markdownContent).toContain("echo 'another regular' > another_regular.txt");

      // Check folders output - only incremental commands should have run
      const finalPath = path.join(tempDir, 'build/cmd-test/final');
      expect(fs.existsSync(path.join(finalPath, 'regular.txt'))).toBe(false);
      expect(fs.existsSync(path.join(finalPath, 'incremental.txt'))).toBe(true);
      expect(fs.existsSync(path.join(finalPath, 'another_regular.txt'))).toBe(false);

      // Check file contents for incremental command
      const incrementalContent = fs.readFileSync(path.join(finalPath, 'incremental.txt'), 'utf8').trim();
      expect(incrementalContent).toBe('incremental command');
    });
  });

  it("should generate section READMEs with diffs and show file blocks", () => {
    withTmpDir((tempDir: string) => {
      // Create source files
      fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/v1-index.ts'),
        'console.log("hello");'
      );
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough/v2-index.ts'),
        'console.log("hello");\nconsole.log("world");'
      );

      // Create walkthrough.yaml
      fs.writeFileSync(
        path.join(tempDir, 'walkthrough.yaml'),
        `title: "Test Section README Diffs"
text: "Testing section README diff generation"
targets:
  - folders:
      path: "./build/sections"
      final:
        dirName: "final"
    onChange:
      diff: true
      cp: true
    newFiles:
      cat: false
      cp: true
sections:
  - name: first-section
    title: "First Section"
    text: "First section text"
    steps:
      - text: "Add initial index.ts"
        file: {src: ./walkthrough/v1-index.ts, dest: src/index.ts}
  - name: second-section
    title: "Second Section"
    text: "Second section text"
    steps:
      - text: "Update index.ts"
        file: {src: ./walkthrough/v2-index.ts, dest: src/index.ts}`
      );

      // Run CLI
      cli(["generate", path.join(tempDir, "walkthrough.yaml")]);

      // Check first section README
      const firstSectionPath = path.join(tempDir, 'build/sections/00-first-section');
      const firstReadme = fs.readFileSync(path.join(firstSectionPath, 'README.md'), 'utf8');
      expect(firstReadme).toContain("Add initial index.ts");
      expect(firstReadme).toContain("cp ./walkthrough/v1-index.ts src/index.ts");
      expect(firstReadme).toContain("<details>\n<summary>show file</summary>");
      expect(firstReadme).toContain("```ts\n// ./walkthrough/v1-index.ts");
      expect(firstReadme).toContain('console.log("hello");');

      // Check second section README
      const secondSectionPath = path.join(tempDir, 'build/sections/01-second-section');
      const secondReadme = fs.readFileSync(path.join(secondSectionPath, 'README.md'), 'utf8');
      expect(secondReadme).toContain("Update index.ts");
      expect(secondReadme).toContain("```diff\nsrc/index.ts\n+console.log(\"world\");");
      expect(secondReadme).toContain("<details>\n<summary>skip this step</summary>");
      expect(secondReadme).toContain("cp ./walkthrough/v2-index.ts src/index.ts");
    });
  });
});

================================================
FILE: packages/walkthroughgen/test/utils/console-mock.ts
================================================
/**
 * A utility function to mock console.log and console.error and capture their output
 * @param callback The function to execute while console is mocked
 * @returns The captured console output (both log and error messages)
 */
export const withMockedConsole = (callback: () => void): string => {
  const originalConsoleLog = console.log;
  const originalConsoleError = console.error;
  let capturedOutput: string[] = [];
  
  console.log = (...args: any[]) => {
    capturedOutput.push(args.join(" "));
  };

  console.error = (...args: any[]) => {
    capturedOutput.push(args.join(" "));
  };

  try {
    callback();
  } finally {
    console.log = originalConsoleLog;
    console.error = originalConsoleError;
  }

  return capturedOutput.join("\n");
};

================================================
FILE: packages/walkthroughgen/test/utils/temp-dir.ts
================================================
import { mkdtempSync, rmSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';

/**
 * Creates a temporary directory, executes a function with that directory, then removes it
 */
export function withTmpDir<T>(fn: (dir: string) => T): T {
  const dir = mkdtempSync(join(__dirname, '.tmptest'));
  try {
    return fn(dir);
  } finally {
    rmSync(dir, { recursive: true, force: true });
  }
}


================================================
FILE: packages/walkthroughgen/tsconfig.json
================================================

{
    "compilerOptions": {
      "target": "es2016",
      "module": "commonjs",
      "esModuleInterop": true,
      "forceConsistentCasingInFileNames": true,
      "strict": true,
      "skipLibCheck": true
    },
    "exclude": ["node_modules", "dist", "**/walkthrough/**"]
  }
  

================================================
FILE: workshops/.gitignore
================================================
baml_client/


================================================
FILE: workshops/.python-version
================================================
3.11


================================================
FILE: workshops/2025-05/.gitignore
================================================
build/


================================================
FILE: workshops/2025-05/Makefile
================================================
.PHONY: clean
clean:
	rm -rf build/

.PHONY: generate
generate: clean
	npm -C ../../packages/walkthroughgen/ \
	  exec tsx \
	  ../../packages/walkthroughgen/src/index.ts \
	  generate walkthrough.yaml	
  

================================================
FILE: workshops/2025-05/final/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/final/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05/final/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/final/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/final/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/final/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "express": "^5.1.0",
        "humanlayer": "^0.7.7",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/express": "^5.0.1",
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0",
        "supertest": "^7.1.0"
    }
}


================================================
FILE: workshops/2025-05/final/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }

    awaitingHumanResponse(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
    }

    awaitingHumanApproval(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return lastEvent.data.intent === 'divide';
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "divide":
                // divide is scary, return it for human approval
                return thread;
            case "add":
            case "subtract":
            case "multiply":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/final/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
                // custom email body - jinja
                template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
<br><br>
reply to this email to approve
{% endif %}`
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05/final/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/final/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const newThread = await agentLoop(thread);

    store.update(req.params.id, newThread);

    lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/final/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}

================================================
FILE: workshops/2025-05/final/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/00-hello-world/README.md
================================================
# Chapter 0 - Hello World

Let's start with a basic TypeScript setup and a hello world program.

This guide is written in TypeScript (yes, a python version is coming soon)

There are many checkpoints between the every file edit in theworkshop steps, 
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.

To run this guide, you'll need a relatively recent version of nodejs and npm installed

You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine


    brew install node@20

You should see the node version

    node --version

Copy initial package.json

    cp ./walkthrough/00-package.json package.json

<details>
<summary>show file</summary>

```json
// ./walkthrough/00-package.json
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
```

</details>

Install dependencies

    npm install

Copy tsconfig.json

    cp ./walkthrough/00-tsconfig.json tsconfig.json

<details>
<summary>show file</summary>

```json
// ./walkthrough/00-tsconfig.json
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
```

</details>

add .gitignore

    cp ./walkthrough/00-.gitignore .gitignore

<details>
<summary>show file</summary>

```gitignore
// ./walkthrough/00-.gitignore
baml_client/
node_modules/
```

</details>

Create src folder

    mkdir -p src

Add a simple hello world index.ts

    cp ./walkthrough/00-index.ts src/index.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/00-index.ts
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)
```

</details>

Run it to verify

    npx tsx src/index.ts

You should see:

    hello, world!


================================================
FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-index.ts
================================================
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
  

================================================
FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/README.md
================================================
# Chapter 1 - CLI and Agent Loop

Now let's add BAML and create our first agent with a CLI interface.

First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.


    npm install @boundaryml/baml

Initialize BAML

    npx baml-cli init

Remove default resume.baml

    rm baml_src/resume.baml

Add our starter agent, a single baml prompt that we'll build on

    cp ./walkthrough/01-agent.baml baml_src/agent.baml

<details>
<summary>show file</summary>

```rust
// ./walkthrough/01-agent.baml
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}
```

</details>

Generate BAML client code

    npx baml-cli generate

Enable BAML logging for this section

    export BAML_LOG=debug

Add the CLI interface

    cp ./walkthrough/01-cli.ts src/cli.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/01-cli.ts
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}
```

</details>

Update index.ts to use the CLI

```diff
src/index.ts
+import { cli } from "./cli"
+
 async function hello(): Promise<void> {
     console.log('hello, world!')
 
 async function main() {
-    await hello()
+    await cli()
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/01-index.ts src/index.ts

</details>

Add the agent implementation

    cp ./walkthrough/01-agent.ts src/agent.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/01-agent.ts
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}
```

</details>

The the BAML code is configured to use OPENAI_API_KEY by default

As you're testing, you can change the model / provider to something else
as you please

        client "openai/gpt-4o"

[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)

For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) 
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.

If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key.


    export OPENAI_API_KEY=...

Try it out

    npx tsx src/index.ts hello

you should see a familiar response from the model

    {
  intent: 'done_for_now',
  message: 'Hello! How can I assist you today?'
}


================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
  

================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/src/index.ts
================================================
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}


================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/02-calculator-tools/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/02-calculator-tools/README.md
================================================
# Chapter 2 - Add Calculator Tools

Let's add some calculator tools to our agent.

Let's start by adding a tool definition for the calculator

These are simpile structured outputs that we'll ask the model to 
return as a "next step" in the agentic loop.


    cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml

<details>
<summary>show file</summary>

```rust
// ./walkthrough/02-tool_calculator.baml
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}
```

</details>

Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps


```diff
baml_src/agent.baml
 function DetermineNextStep(
     thread: string 
-) -> DoneForNow {
+) -> CalculatorTools | DoneForNow {
     client "openai/gpt-4o"
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/02-agent.baml baml_src/agent.baml

</details>

Generate updated BAML client

    npx baml-cli generate

Try out the calculator

    npx tsx src/index.ts 'can you add 3 and 4'

You should see a tool call to the calculator

    {
  intent: 'add',
  a: 3,
  b: 4
}


================================================
FILE: workshops/2025-05/sections/02-calculator-tools/baml_src/agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05/sections/02-calculator-tools/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/02-calculator-tools/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/02-calculator-tools/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0"
    }
}


================================================
FILE: workshops/2025-05/sections/02-calculator-tools/src/agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}


================================================
FILE: workshops/2025-05/sections/02-calculator-tools/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05/sections/02-calculator-tools/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/02-calculator-tools/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/02-calculator-tools/walkthrough/02-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05/sections/02-calculator-tools/walkthrough/02-tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/03-tool-loop/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/03-tool-loop/README.md
================================================
# Chapter 3 - Process Tool Calls in a Loop

Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.

First, lets update the agent to handle the tool call


```diff
src/agent.ts
 }
 
-// right now this just runs one turn with the LLM, but
-// we'll update this function to handle all the agent logic
-export async function agentLoop(thread: Thread): Promise<AgentResponse> {
-    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
-    return nextStep;
+
+
+export async function agentLoop(thread: Thread): Promise<string> {
+
+    while (true) {
+        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
+        console.log("nextStep", nextStep);
+
+        switch (nextStep.intent) {
+            case "done_for_now":
+                // response to human, return the next step object
+                return nextStep.message;
+            case "add":
+                thread.events.push({
+                    "type": "tool_call",
+                    "data": nextStep
+                });
+                const result = nextStep.a + nextStep.b;
+                console.log("tool_response", result);
+                thread.events.push({
+                    "type": "tool_response",
+                    "data": result
+                });
+                continue;
+            default:
+                throw new Error(`Unknown intent: ${nextStep.intent}`);
+        }
+    }
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/03-agent.ts src/agent.ts

</details>

Now, lets try it out


    npx tsx src/index.ts 'can you add 3 and 4'

you should see the agent call the tool and then return the result

    {
  intent: 'done_for_now',
  message: 'The sum of 3 and 4 is 7.'
}

For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output

    export BAML_LOG=off

Try a multi-step calculation

    npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'

you'll notice that tools like multiply and divide are not available

    npx tsx src/index.ts 'can you multiply 3 and 4'

next, let's add handlers for the rest of the calculator tools


```diff
src/agent.ts
-import { b } from "../baml_client";
+import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
 
-// tool call or a respond to human tool
-type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
-
 export interface Event {
     type: string
 }
 
+export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
 
+export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
+    let result: number;
+    switch (nextStep.intent) {
+        case "add":
+            result = nextStep.a + nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "subtract":
+            result = nextStep.a - nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "multiply":
+            result = nextStep.a * nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "divide":
+            result = nextStep.a / nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+    }
+}
 
 export async function agentLoop(thread: Thread): Promise<string> {
         console.log("nextStep", nextStep);
 
+        thread.events.push({
+            "type": "tool_call",
+            "data": nextStep
+        });
+
         switch (nextStep.intent) {
             case "done_for_now":
                 return nextStep.message;
             case "add":
-                thread.events.push({
-                    "type": "tool_call",
-                    "data": nextStep
-                });
-                const result = nextStep.a + nextStep.b;
-                console.log("tool_response", result);
-                thread.events.push({
-                    "type": "tool_response",
-                    "data": result
-                });
-                continue;
-            default:
-                throw new Error(`Unknown intent: ${nextStep.intent}`);
+            case "subtract":
+            case "multiply":
+            case "divide":
+                thread = await handleNextStep(nextStep, thread);
         }
     }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/03b-agent.ts src/agent.ts

</details>

Test subtraction

    npx tsx src/index.ts 'can you subtract 3 from 4'

now, let's test the multiplication tool


    npx tsx src/index.ts 'can you multiply 3 and 4'

finally, let's test a more complex calculation with multiple operations


    npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'


================================================
FILE: workshops/2025-05/sections/03-tool-loop/baml_src/agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05/sections/03-tool-loop/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/03-tool-loop/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/03-tool-loop/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/03-tool-loop/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0"
    }
}


================================================
FILE: workshops/2025-05/sections/03-tool-loop/src/agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}


================================================
FILE: workshops/2025-05/sections/03-tool-loop/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05/sections/03-tool-loop/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/03-tool-loop/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/03-tool-loop/walkthrough/03-agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}


export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
                thread.events.push({
                    "type": "tool_call",
                    "data": nextStep
                });
                const result = nextStep.a + nextStep.b;
                console.log("tool_response", result);
                thread.events.push({
                    "type": "tool_response",
                    "data": result
                });
                continue;
            default:
                throw new Error(`Unknown intent: ${nextStep.intent}`);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/03-tool-loop/walkthrough/03b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/04-baml-tests/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/04-baml-tests/README.md
================================================
# Chapter 4 - Add Tests to agent.baml

Let's add some tests to our BAML agent.

to start, leave the baml logs enabled

    export BAML_LOG=debug

next, let's add some tests to the agent

We'll start with a simple test that checks the agent's ability to handle
a basic calculation.


```diff
baml_src/agent.baml
     "#
   }
+
+test MathOperation {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+      {
+        "type": "user_input",
+        "data": "can you multiply 3 and 4?"
+      }
+    "#
+  }
+}
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/04-agent.baml baml_src/agent.baml

</details>

Run the tests

    npx baml-cli test

now, let's improve the test with assertions!

Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.


```diff
baml_src/agent.baml
     "#
   }
+  @@assert(hello, {{this.intent == "done_for_now"}})
 }
 
     "#
   }
+  @@assert(math_operation, {{this.intent == "multiply"}})
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/04b-agent.baml baml_src/agent.baml

</details>

Run the tests

    npx baml-cli test

as you add more tests, you can disable the logs to keep the output clean. 
You may want to turn them on as you iterate on specific tests.


    export BAML_LOG=off

now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window


```diff
baml_src/agent.baml
     "#
   }
-  @@assert(hello, {{this.intent == "done_for_now"}})
+  @@assert(intent, {{this.intent == "done_for_now"}})
 }
 
     "#
   }
-  @@assert(math_operation, {{this.intent == "multiply"}})
+  @@assert(intent, {{this.intent == "multiply"}})
 }
 
+test LongMath {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+      [
+        {
+          "type": "user_input",
+          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
+        },
+        {
+          "type": "tool_call",
+          "data": {
+            "intent": "multiply",
+            "a": 3,
+            "b": 4
+          }
+        },
+        {
+          "type": "tool_response",
+          "data": 12
+        },
+        {
+          "type": "tool_call", 
+          "data": {
+            "intent": "divide",
+            "a": 12,
+            "b": 2
+          }
+        },
+        {
+          "type": "tool_response",
+          "data": 6
+        },
+        {
+          "type": "tool_call",
+          "data": {
+            "intent": "add", 
+            "a": 6,
+            "b": 12
+          }
+        },
+        {
+          "type": "tool_response",
+          "data": 18
+        }
+      ]
+    "#
+  }
+  @@assert(intent, {{this.intent == "done_for_now"}})
+  @@assert(answer, {{"18" in this.message}})
+}
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/04c-agent.baml baml_src/agent.baml

</details>

let's try to run it


    npx baml-cli test


================================================
FILE: workshops/2025-05/sections/04-baml-tests/baml_src/agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05/sections/04-baml-tests/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/04-baml-tests/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/04-baml-tests/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/04-baml-tests/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0"
    }
}


================================================
FILE: workshops/2025-05/sections/04-baml-tests/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/04-baml-tests/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05/sections/04-baml-tests/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/04-baml-tests/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/04-baml-tests/walkthrough/04-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
}


================================================
FILE: workshops/2025-05/sections/04-baml-tests/walkthrough/04b-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(hello, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(math_operation, {{this.intent == "multiply"}})
}


================================================
FILE: workshops/2025-05/sections/04-baml-tests/walkthrough/04c-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/05-human-tools/README.md
================================================
# Chapter 5 - Multiple Human Tools

In this section, we'll add support for multiple tools that serve to 
contact humans.


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

first, let's add a tool that can request clarification from a human 

this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.


```diff
baml_src/agent.baml
+// human tools are async requests to a human
+type HumanTools = ClarificationRequest | DoneForNow
+
+class ClarificationRequest {
+  intent "request_more_information" @description("you can request more information from me")
+  message string
+}
+
 class DoneForNow {
   intent "done_for_now"
-  message string 
+
+  message string @description(#"
+    message to send to the user about the work that was done. 
+  "#)
 }
 
 function DetermineNextStep(
     thread: string 
-) -> CalculatorTools | DoneForNow {
+) -> HumanTools | CalculatorTools {
     client "openai/gpt-4o"
 
 }
 
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05-agent.baml baml_src/agent.baml

</details>

next, let's re-generate the client code

NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.


    npx baml-cli generate

now, let's update the agent to use the new tool


```diff
src/agent.ts
 }
 
-export async function agentLoop(thread: Thread): Promise<string> {
+export async function agentLoop(thread: Thread): Promise<Thread> {
 
     while (true) {
         switch (nextStep.intent) {
             case "done_for_now":
-                // response to human, return the next step object
-                return nextStep.message;
+            case "request_more_information":
+                // response to human, return the thread
+                return thread;
             case "add":
             case "subtract":
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05-agent.ts src/agent.ts

</details>

next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI


```diff
src/cli.ts
 // cli.ts lets you invoke the agent loop from the command line
 
-import { agentLoop, Thread, Event } from "./agent";
+import { agentLoop, Thread, Event } from "../src/agent";
 
+
+
 export async function cli() {
     // Get command line arguments, skipping the first two (node and script name)
     // Run the agent loop with the thread
     const result = await agentLoop(thread);
-    console.log(result);
+    let lastEvent = result.events.slice(-1)[0];
+
+    while (lastEvent.data.intent === "request_more_information") {
+        const message = await askHuman(lastEvent.data.message);
+        thread.events.push({ type: "human_response", data: message });
+        const result = await agentLoop(thread);
+        lastEvent = result.events.slice(-1)[0];
+    }
+
+    // print the final result
+    // optional - you could loop here too
+    console.log(lastEvent.data.message);
+    process.exit(0);
 }
+
+async function askHuman(message: string) {
+    const readline = require('readline').createInterface({
+        input: process.stdin,
+        output: process.stdout
+    });
+
+    return new Promise((resolve) => {
+        readline.question(`${message}\n> `, (answer: string) => {
+            resolve(answer);
+        });
+    });
+}
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05-cli.ts src/cli.ts

</details>

let's try it out


    npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '

next, let's add a test that checks the agent's ability to handle
a clarification request


```diff
baml_src/agent.baml
 
 
+
+test MathOperationWithClarification {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+      "#
+  }
+  @@assert(intent, {{this.intent == "request_more_information"}})
+}
+
+test MathOperationPostClarification {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+        [
+        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
+        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
+        {"type":"human_response","data":"lets try 12 instead"},
+      ]
+      "#
+  }
+  @@assert(intent, {{this.intent == "multiply"}})
+  @@assert(a, {{this.b == 12}})
+  @@assert(b, {{this.a == 3}})
+}
+        
+
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05b-agent.baml baml_src/agent.baml

</details>

and now we can run the tests again


    npx baml-cli test

you'll notice the new test passes, but the hello world test fails

This is because the agent's default behavior is to return "done_for_now"


```diff
baml_src/agent.baml
     "#
   }
-  @@assert(intent, {{this.intent == "done_for_now"}})
+  @@assert(intent, {{this.intent == "request_more_information"}})
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05c-agent.baml baml_src/agent.baml

</details>

Verify tests pass

    npx baml-cli test


================================================
FILE: workshops/2025-05/sections/05-human-tools/baml_src/agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/05-human-tools/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.202.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "@boundaryml/baml": "latest",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0"
    }
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/05-human-tools/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "../src/agent";


export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    let lastEvent = result.events.slice(-1)[0];

    while (lastEvent.data.intent === "request_more_information") {
        const message = await askHuman(lastEvent.data.message);
        thread.events.push({ type: "human_response", data: message });
        const result = await agentLoop(thread);
        lastEvent = result.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(message: string) {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve(answer);
        });
    });
}


================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05b-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05/sections/06-customize-prompt/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/06-customize-prompt/README.md
================================================
# Chapter 6 - Customize Your Prompt with Reasoning

In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.

this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)

there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)


for this section, it will be helpful to leave the baml logs enabled

    export BAML_LOG=debug

update the agent prompt to include a reasoning step


```diff
baml_src/agent.baml
 
         {{ ctx.output_format }}
+
+        First, always plan out what to do next, for example:
+
+        - ...
+        - ...
+        - ...
+
+        {...} // schema
     "#
 }
   @@assert(b, {{this.a == 3}})
 }
-        
-
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/06-agent.baml baml_src/agent.baml

</details>

generate the updated client

    npx baml-cli generate

now, you can try it out with a simple prompt


    npx tsx src/index.ts 'can you multiply 3 and 4'

you should see output from the baml logs showing the reasoning steps

#### optional challenge 

add a field to your tool output format that includes the reasoning steps in the output!


================================================
FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/06-customize-prompt/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0"
    }
}


================================================
FILE: workshops/2025-05/sections/06-customize-prompt/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/06-customize-prompt/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "../src/agent";


export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    let lastEvent = result.events.slice(-1)[0];

    while (lastEvent.data.intent === "request_more_information") {
        const message = await askHuman(lastEvent.data.message);
        thread.events.push({ type: "human_response", data: message });
        const result = await agentLoop(thread);
        lastEvent = result.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(message: string) {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve(answer);
        });
    });
}


================================================
FILE: workshops/2025-05/sections/06-customize-prompt/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/06-customize-prompt/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/06-customize-prompt/walkthrough/06-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        First, always plan out what to do next, for example:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05/sections/07-context-window/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/07-context-window/README.md
================================================
# Chapter 7 - Customize Your Context Window

In this section, we'll explore how to customize the context window
of the agent.

this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)


update the agent to pretty-print the Context window for the model


```diff
src/agent.ts
         // can change this to whatever custom serialization you want to do, XML, etc
         // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
-        return JSON.stringify(this.events);
+        return JSON.stringify(this.events, null, 2);
     }
 }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/07-agent.ts src/agent.ts

</details>

Test the formatting

    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

next, let's update the agent to use XML formatting instead 

this is a very popular format for passing data to a model,

among other things, because of the token efficiency of XML.


```diff
src/agent.ts
 
     serializeForLLM() {
-        // can change this to whatever custom serialization you want to do, XML, etc
-        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
-        return JSON.stringify(this.events, null, 2);
+        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
     }
+
+    trimLeadingWhitespace(s: string) {
+        return s.replace(/^[ \t]+/gm, '');
+    }
+
+    serializeOneEvent(e: Event) {
+        return this.trimLeadingWhitespace(`
+            <${e.data?.intent || e.type}>
+            ${
+            typeof e.data !== 'object' ? e.data :
+            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
+            </${e.data?.intent || e.type}>
+        `)
+    }
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/07b-agent.ts src/agent.ts

</details>

let's try it out


    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

lets update our tests to match the new output format


```diff
baml_src/agent.baml
         {{ ctx.output_format }}
 
-        First, always plan out what to do next, for example:
+        Always think about what to do next first, like:
 
         - ...
   args {
     thread #"
-      {
-        "type": "user_input",
-        "data": "hello!"
-      }
+      <user_input>
+        hello!
+      </user_input>
     "#
   }
   args {
     thread #"
-      {
-        "type": "user_input",
-        "data": "can you multiply 3 and 4?"
-      }
+      <user_input>
+        can you multiply 3 and 4?
+      </user_input>
     "#
   }
   args {
     thread #"
-      [
-        {
-          "type": "user_input",
-          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
-        },
-        {
-          "type": "tool_call",
-          "data": {
-            "intent": "multiply",
-            "a": 3,
-            "b": 4
-          }
-        },
-        {
-          "type": "tool_response",
-          "data": 12
-        },
-        {
-          "type": "tool_call", 
-          "data": {
-            "intent": "divide",
-            "a": 12,
-            "b": 2
-          }
-        },
-        {
-          "type": "tool_response",
-          "data": 6
-        },
-        {
-          "type": "tool_call",
-          "data": {
-            "intent": "add", 
-            "a": 6,
-            "b": 12
-          }
-        },
-        {
-          "type": "tool_response",
-          "data": 18
-        }
-      ]
+         <user_input>
+    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
+    </user_input>
+
+
+    <multiply>
+    a: 3
+    b: 4
+    </multiply>
+
+
+    <tool_response>
+    12
+    </tool_response>
+
+
+    <divide>
+    a: 12
+    b: 2
+    </divide>
+
+
+    <tool_response>
+    6
+    </tool_response>
+
+
+    <add>
+    a: 6
+    b: 12
+    </add>
+
+
+    <tool_response>
+    18
+    </tool_response>
+
     "#
   }
   args {
     thread #"
-          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+          <user_input>
+          can you multiply 3 and fe1iiaff10
+          </user_input>
       "#
   }
   args {
     thread #"
-        [
-        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
-        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
-        {"type":"human_response","data":"lets try 12 instead"},
-      ]
+        <user_input>
+        can you multiply 3 and FD*(#F&& ?
+        </user_input>
+
+        <request_more_information>
+        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
+        </request_more_information>
+
+        <human_response>
+        lets try 12 instead
+        </human_response>
       "#
   }
   @@assert(intent, {{this.intent == "multiply"}})
 }
         
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/07c-agent.baml baml_src/agent.baml

</details>

check out the updated tests


    npx baml-cli test


================================================
FILE: workshops/2025-05/sections/07-context-window/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        First, always plan out what to do next, for example:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05/sections/07-context-window/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/07-context-window/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/07-context-window/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/07-context-window/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0"
    }
}


================================================
FILE: workshops/2025-05/sections/07-context-window/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/07-context-window/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "../src/agent";


export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    let lastEvent = result.events.slice(-1)[0];

    while (lastEvent.data.intent === "request_more_information") {
        const message = await askHuman(lastEvent.data.message);
        thread.events.push({ type: "human_response", data: message });
        const result = await agentLoop(thread);
        lastEvent = result.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(message: string) {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve(answer);
        });
    });
}


================================================
FILE: workshops/2025-05/sections/07-context-window/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/07-context-window/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/07-context-window/walkthrough/07-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events, null, 2);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/07-context-window/walkthrough/07b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/07-context-window/walkthrough/07c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05/sections/08-api-endpoints/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/08-api-endpoints/README.md
================================================
# Chapter 8 - Adding API Endpoints

Add an Express server to expose the agent via HTTP.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install Express and types

    npm install express && npm install --save-dev @types/express supertest

Add the server implementation

    cp ./walkthrough/08-server.ts src/server.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/08-server.ts
import express from 'express';
import { Thread, agentLoop } from '../src/agent';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    const result = await agentLoop(thread);
    res.json(result);
});

// GET /thread/:id - Get thread status 
app.get('/thread/:id', (req, res) => {
    // optional - add state
    res.status(404).json({ error: "Not implemented yet" });
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };
```

</details>

Start the server

    npx tsx src/server.ts

Test with curl (in another terminal)

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you add 3 and 4"}'

You should get an answer from the agent which includes the
agentic trace, ending in a message like: 


    {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}


================================================
FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/08-api-endpoints/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0"
    }
}


================================================
FILE: workshops/2025-05/sections/08-api-endpoints/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/08-api-endpoints/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "../src/agent";


export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    let lastEvent = result.events.slice(-1)[0];

    while (lastEvent.data.intent === "request_more_information") {
        const message = await askHuman(lastEvent.data.message);
        thread.events.push({ type: "human_response", data: message });
        const result = await agentLoop(thread);
        lastEvent = result.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(message: string) {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve(answer);
        });
    });
}


================================================
FILE: workshops/2025-05/sections/08-api-endpoints/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/08-api-endpoints/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/08-api-endpoints/walkthrough/08-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    const result = await agentLoop(thread);
    res.json(result);
});

// GET /thread/:id - Get thread status 
app.get('/thread/:id', (req, res) => {
    // optional - add state
    res.status(404).json({ error: "Not implemented yet" });
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/09-state-management/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/09-state-management/README.md
================================================
# Chapter 9 - In-Memory State and Async Clarification

Add state management and async clarification support.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Add some simple in-memory state management for threads

    cp ./walkthrough/09-state.ts src/state.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/09-state.ts
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}
```

</details>

update the server to use the state management

* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id 
* implement POST /thread/:id/response


```diff
src/server.ts
 import express from 'express';
 import { Thread, agentLoop } from '../src/agent';
+import { ThreadStore } from '../src/state';
 
 const app = express();
 app.set('json spaces', 2);
 
+const store = new ThreadStore();
+
 // POST /thread - Start new thread
 app.post('/thread', async (req, res) => {
         data: req.body.message
     }]);
-    const result = await agentLoop(thread);
-    res.json(result);
+    
+    const threadId = store.create(thread);
+    const newThread = await agentLoop(thread);
+    
+    store.update(threadId, newThread);
+
+    const lastEvent = newThread.events[newThread.events.length - 1];
+    // If we exited the loop, include the response URL so the client can
+    // push a new message onto the thread
+    lastEvent.data.response_url = `/thread/${threadId}/response`;
+
+    console.log("returning last event from endpoint", lastEvent);
+
+    res.json({ 
+        thread_id: threadId,
+        ...newThread 
+    });
 });
 
 app.get('/thread/:id', (req, res) => {
-    // optional - add state
-    res.status(404).json({ error: "Not implemented yet" });
+    const thread = store.get(req.params.id);
+    if (!thread) {
+        return res.status(404).json({ error: "Thread not found" });
+    }
+    res.json(thread);
 });
 
+// POST /thread/:id/response - Handle clarification response
+app.post('/thread/:id/response', async (req, res) => {
+    let thread = store.get(req.params.id);
+    if (!thread) {
+        return res.status(404).json({ error: "Thread not found" });
+    }
+    
+    thread.events.push({
+        type: "human_response",
+        data: req.body.message
+    });
+    
+    // loop until stop event
+    const newThread = await agentLoop(thread);
+    
+    store.update(req.params.id, newThread);
+
+    const lastEvent = newThread.events[newThread.events.length - 1];
+    lastEvent.data.response_url = `/thread/${req.params.id}/response`;
+
+    console.log("returning last event from endpoint", lastEvent);
+    
+    res.json(newThread);
+});
+
 const port = process.env.PORT || 3000;
 app.listen(port, () => {
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/09-server.ts src/server.ts

</details>

Start the server

    npx tsx src/server.ts

Test clarification flow

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you multiply 3 and xyz"}'


================================================
FILE: workshops/2025-05/sections/09-state-management/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05/sections/09-state-management/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/09-state-management/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/09-state-management/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/09-state-management/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "express": "^5.1.0",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/express": "^5.0.1",
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0",
        "supertest": "^7.1.0"
    }
}


================================================
FILE: workshops/2025-05/sections/09-state-management/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/09-state-management/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "../src/agent";


export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    let lastEvent = result.events.slice(-1)[0];

    while (lastEvent.data.intent === "request_more_information") {
        const message = await askHuman(lastEvent.data.message);
        thread.events.push({ type: "human_response", data: message });
        const result = await agentLoop(thread);
        lastEvent = result.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(message: string) {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve(answer);
        });
    });
}


================================================
FILE: workshops/2025-05/sections/09-state-management/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/09-state-management/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    const result = await agentLoop(thread);
    res.json(result);
});

// GET /thread/:id - Get thread status 
app.get('/thread/:id', (req, res) => {
    // optional - add state
    res.status(404).json({ error: "Not implemented yet" });
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/09-state-management/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/09-state-management/walkthrough/09-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    
    thread.events.push({
        type: "human_response",
        data: req.body.message
    });
    
    // loop until stop event
    const newThread = await agentLoop(thread);
    
    store.update(req.params.id, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/09-state-management/walkthrough/09-state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}

================================================
FILE: workshops/2025-05/sections/10-human-approval/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/10-human-approval/README.md
================================================
# Chapter 10 - Adding Human Approval

Add support for human approval of operations.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

update the server to handle human approvals

* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs


```diff
src/server.ts
 import express from 'express';
-import { Thread, agentLoop } from '../src/agent';
+import { Thread, agentLoop, handleNextStep } from '../src/agent';
 import { ThreadStore } from '../src/state';
 
 });
 
+
+type ApprovalPayload = {
+    type: "approval";
+    approved: boolean;
+    comment?: string;
+}
+
+type ResponsePayload = {
+    type: "response";
+    response: string;
+}
+
+type Payload = ApprovalPayload | ResponsePayload;
+
 // POST /thread/:id/response - Handle clarification response
 app.post('/thread/:id/response', async (req, res) => {
         return res.status(404).json({ error: "Thread not found" });
     }
+
+    const body: Payload = req.body;
+
+    let lastEvent = thread.events[thread.events.length - 1];
+
+    if (thread.awaitingHumanResponse() && body.type === 'response') {
+        thread.events.push({
+            type: "human_response",
+            data: body.response
+        });
+    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
+        // push feedback onto the thread
+        thread.events.push({
+            type: "tool_response",
+            data: `user denied the operation with feedback: "${body.comment}"`
+        });
+    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
+        // approved, run the tool, pushing results onto the thread
+        await handleNextStep(lastEvent.data, thread);
+    } else {
+        res.status(400).json({
+            error: "Invalid request: " + body.type,
+            awaitingHumanResponse: thread.awaitingHumanResponse(),
+            awaitingHumanApproval: thread.awaitingHumanApproval()
+        });
+        return;
+    }
+
     
-    thread.events.push({
-        type: "human_response",
-        data: req.body.message
-    });
-    
     // loop until stop event
     const newThread = await agentLoop(thread);
     store.update(req.params.id, newThread);
 
-    const lastEvent = newThread.events[newThread.events.length - 1];
+    lastEvent = newThread.events[newThread.events.length - 1];
     lastEvent.data.response_url = `/thread/${req.params.id}/response`;
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/10-server.ts src/server.ts

</details>

Add a few methods to the agent to handle approvals and responses

```diff
src/agent.ts
         `)
     }
+
+    awaitingHumanResponse(): boolean {
+        const lastEvent = this.events[this.events.length - 1];
+        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
+    }
+
+    awaitingHumanApproval(): boolean {
+        const lastEvent = this.events[this.events.length - 1];
+        return lastEvent.data.intent === 'divide';
+    }
 }
 
                 // response to human, return the thread
                 return thread;
+            case "divide":
+                // divide is scary, return it for human approval
+                return thread;
             case "add":
             case "subtract":
             case "multiply":
-            case "divide":
                 thread = await handleNextStep(nextStep, thread);
         }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/10-agent.ts src/agent.ts

</details>

Start the server

    npx tsx src/server.ts

Test division with approval

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you divide 3 by 4"}'

You should see:

    {
  "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
  "events": [
    {
      "type": "user_input",
      "data": "can you divide 3 by 4"
    },
    {
      "type": "tool_call",
      "data": {
        "intent": "divide",
        "a": 3,
        "b": 4,
        "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
      }
    }
  ]
}

reject the request with another curl call, changing the thread ID

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'

You should see: the last tool call is now `"intent":"divide","a":3,"b":5`

    {
  "events": [
    {
      "type": "user_input",
      "data": "can you divide 3 by 4"
    },
    {
      "type": "tool_call",
      "data": {
        "intent": "divide",
        "a": 3,
        "b": 4,
        "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
      }
    },
    {
      "type": "tool_response",
      "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
    },
    {
      "type": "tool_call",
      "data": {
        "intent": "divide",
        "a": 3,
        "b": 5,
        "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
      }
    }
  ]
}

now you can approve the operation

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": true}'

you should see the final message includes the tool response and final result!

    ...
{
  "type": "tool_response",
  "data": 0.5
},
{
  "type": "done_for_now",
  "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
  "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
}


================================================
FILE: workshops/2025-05/sections/10-human-approval/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05/sections/10-human-approval/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/10-human-approval/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/10-human-approval/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/10-human-approval/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "express": "^5.1.0",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/express": "^5.0.1",
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0",
        "supertest": "^7.1.0"
    }
}


================================================
FILE: workshops/2025-05/sections/10-human-approval/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/10-human-approval/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "../src/agent";


export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    let lastEvent = result.events.slice(-1)[0];

    while (lastEvent.data.intent === "request_more_information") {
        const message = await askHuman(lastEvent.data.message);
        thread.events.push({ type: "human_response", data: message });
        const result = await agentLoop(thread);
        lastEvent = result.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(message: string) {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve(answer);
        });
    });
}


================================================
FILE: workshops/2025-05/sections/10-human-approval/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/10-human-approval/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    
    thread.events.push({
        type: "human_response",
        data: req.body.message
    });
    
    // loop until stop event
    const newThread = await agentLoop(thread);
    
    store.update(req.params.id, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/10-human-approval/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}

================================================
FILE: workshops/2025-05/sections/10-human-approval/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/10-human-approval/walkthrough/10-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }

    awaitingHumanResponse(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
    }

    awaitingHumanApproval(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return lastEvent.data.intent === 'divide';
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "divide":
                // divide is scary, return it for human approval
                return thread;
            case "add":
            case "subtract":
            case "multiply":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/10-human-approval/walkthrough/10-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const newThread = await agentLoop(thread);

    store.update(req.params.id, newThread);

    lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/README.md
================================================
# Chapter 11 - Human Approvals over email

in this section, we'll add support for human approvals over email.

This will start a little bit contrived, just to get the concepts down - 

We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI

While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install HumanLayer

    npm install humanlayer

Update CLI to send `divide` and `request_more_information` to a human via email

```diff
src/cli.ts
 // cli.ts lets you invoke the agent loop from the command line
 
+import { humanlayer } from "humanlayer";
 import { agentLoop, Thread, Event } from "../src/agent";
 
-
-
 export async function cli() {
     // Get command line arguments, skipping the first two (node and script name)
 
     // Run the agent loop with the thread
-    const result = await agentLoop(thread);
-    let lastEvent = result.events.slice(-1)[0];
+    let newThread = await agentLoop(thread);
+    let lastEvent = newThread.events.slice(-1)[0];
 
-    while (lastEvent.data.intent === "request_more_information") {
-        const message = await askHuman(lastEvent.data.message);
-        thread.events.push({ type: "human_response", data: message });
-        const result = await agentLoop(thread);
-        lastEvent = result.events.slice(-1)[0];
+    while (lastEvent.data.intent !== "done_for_now") {
+        const responseEvent = await askHuman(lastEvent);
+        thread.events.push(responseEvent);
+        newThread = await agentLoop(thread);
+        lastEvent = newThread.events.slice(-1)[0];
     }
 
     // print the final result
     console.log(lastEvent.data.message);
     process.exit(0);
 }
 
-async function askHuman(message: string) {
+async function askHuman(lastEvent: Event): Promise<Event> {
+    if (process.env.HUMANLAYER_API_KEY) {
+        return await askHumanEmail(lastEvent);
+    } else {
+        return await askHumanCLI(lastEvent.data.message);
+    }
+}
+
+async function askHumanCLI(message: string): Promise<Event> {
     const readline = require('readline').createInterface({
         input: process.stdin,
     return new Promise((resolve) => {
         readline.question(`${message}\n> `, (answer: string) => {
-            resolve(answer);
+            resolve({ type: "human_response", data: answer });
         });
     });
 }
+
+export async function askHumanEmail(lastEvent: Event): Promise<Event> {
+    if (!process.env.HUMANLAYER_EMAIL) {
+        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+    }
+    const hl = humanlayer({ //reads apiKey from env
+        // name of this agent
+        runId: "12fa-cli-agent",
+        verbose: true,
+        contactChannel: {
+            // agent should request permission via email
+            email: {
+                address: process.env.HUMANLAYER_EMAIL,
+            }
+        }
+    }) 
+
+    if (lastEvent.data.intent === "divide") {
+        // fetch approval synchronously - this will block until reply
+        const response = await hl.fetchHumanApproval({
+            spec: {
+                fn: "divide",
+                kwargs: {
+                    a: lastEvent.data.a,
+                    b: lastEvent.data.b
+                }
+            }
+        })
+
+        if (response.approved) {
+            const result = lastEvent.data.a / lastEvent.data.b;
+            console.log("tool_response", result);
+            return {
+                "type": "tool_response",
+                "data": result
+            };
+        } else {
+            return {
+                "type": "tool_response",
+                "data": `user denied operation ${lastEvent.data.intent}
+                with feedback: ${response.comment}`
+            };
+        }
+    }
+    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
+}
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/11-cli.ts src/cli.ts

</details>

Run the CLI

    npx tsx src/index.ts 'can you divide 4 by 5'

The last line of your program should mention human review step

    nextStep { intent: 'divide', a: 4, b: 5 }
HumanLayer: Requested human approval from HumanLayer cloud

go ahead and respond to the email with some feedback:

![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true)


you should get another email with an updated attempt based on your feedback!

You can go ahead and approve this one:

![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true)


and your final output will look like

    nextStep {
 intent: 'done_for_now',
 message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
}
The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!

lets implement the `request_more_information` flow as well


```diff
src/cli.ts
     }) 
 
+    if (lastEvent.data.intent === "request_more_information") {
+        // fetch response synchronously - this will block until reply
+        const response = await hl.fetchHumanResponse({
+            spec: {
+                msg: lastEvent.data.message
+            }
+        })
+        return {
+            "type": "tool_response",
+            "data": response
+        }
+    }
+    
     if (lastEvent.data.intent === "divide") {
         // fetch approval synchronously - this will block until reply
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/11b-cli.ts src/cli.ts

</details>

lets test the require_approval flow as by asking for a calculation
with garbled input:


    npx tsx src/index.ts 'can you multiply 4 and xyz'

You should get an email with a request for clarification

    Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?

you can response with something like

    use 8 instead of xyz

you should see a final result on the CLI like

    I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.

as a final step, lets explore using a custom html template for the email


```diff
src/cli.ts
             email: {
                 address: process.env.HUMANLAYER_EMAIL,
+                // custom email body - jinja
+                template: `{% if type == 'request_more_information' %}
+{{ event.spec.msg }}
+{% else %}
+agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
+with args: {{event.spec.kwargs}}
+<br><br>
+reply to this email to approve
+{% endif %}`
             }
         }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/11c-cli.ts src/cli.ts

</details>

first try with divide:


    npx tsx src/index.ts 'can you divide 4 by 5'

you should see a slightly different email with the custom template

![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true)

feel free to run with the flow and then you can try updating the template to your liking

(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)

try triggering "request_more_information" as well!


thats it - in the next chapter, we'll build a fully email-driven 
workflow agent that uses webhooks for human approval 


================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "express": "^5.1.0",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/express": "^5.0.1",
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0",
        "supertest": "^7.1.0"
    }
}


================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }

    awaitingHumanResponse(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
    }

    awaitingHumanApproval(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return lastEvent.data.intent === 'divide';
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "divide":
                // divide is scary, return it for human approval
                return thread;
            case "add":
            case "subtract":
            case "multiply":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "../src/agent";


export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    let lastEvent = result.events.slice(-1)[0];

    while (lastEvent.data.intent === "request_more_information") {
        const message = await askHuman(lastEvent.data.message);
        thread.events.push({ type: "human_response", data: message });
        const result = await agentLoop(thread);
        lastEvent = result.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(message: string) {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve(answer);
        });
    });
}


================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const newThread = await agentLoop(thread);

    store.update(req.params.id, newThread);

    lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}

================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/walkthrough/11-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
            }
        }
    }) 

    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/walkthrough/11b-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/walkthrough/11c-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
                // custom email body - jinja
                template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
<br><br>
reply to this email to approve
{% endif %}`
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/README.md
================================================
# Chapter XX - HumanLayer Webhook Integration

the previous sections used the humanlayer SDK in "synchronous mode" - that 
means every time we wait for human approval, we sit in a loop 
polling until the human response if received.

That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results. 


add code to initialize humanlayer in the server


```diff
src/server.ts
 import { Thread, agentLoop, handleNextStep } from '../src/agent';
 import { ThreadStore } from '../src/state';
+import { humanlayer } from 'humanlayer';
 
 const app = express();
 const store = new ThreadStore();
 
+const getHumanlayer = () => {
+    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
+    if (!HUMANLAYER_EMAIL) {
+        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+    }
+
+    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
+    if (!HUMANLAYER_API_KEY) {
+        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
+    }
+    return humanlayer({
+        runId: `12fa-agent`,
+        contactChannel: {
+            email: { address: HUMANLAYER_EMAIL }
+        }
+    });
+}
+
 // POST /thread - Start new thread
 app.post('/thread', async (req, res) => {
     
     // loop until stop event
-    const newThread = await agentLoop(thread);
+    const result = await agentLoop(thread);
 
-    store.update(req.params.id, newThread);
+    store.update(req.params.id, result);
 
-    lastEvent = newThread.events[newThread.events.length - 1];
+    lastEvent = result.events[result.events.length - 1];
     lastEvent.data.response_url = `/thread/${req.params.id}/response`;
 
     console.log("returning last event from endpoint", lastEvent);
     
-    res.json(newThread);
+    res.json(result);
 });
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/12-1-server-init.ts src/server.ts

</details>

next, lets update the /thread endpoint to 
  
1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls


Update the server to be able to handle request_clarification responses

- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response


```diff
src/server.ts
-import express from 'express';
+import express, { Request, Response } from 'express';
 import { Thread, agentLoop, handleNextStep } from '../src/agent';
 import { ThreadStore } from '../src/state';
-import { humanlayer } from 'humanlayer';
+import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
 
 const app = express();
     });
 }
-
 // POST /thread - Start new thread
-app.post('/thread', async (req, res) => {
+app.post('/thread', async (req: Request, res: Response) => {
     const thread = new Thread([{
         type: "user_input",
     }]);
     
-    const threadId = store.create(thread);
-    const newThread = await agentLoop(thread);
-    
-    store.update(threadId, newThread);
+    // run agent loop asynchronously, return immediately
+    Promise.resolve().then(async () => {
+        const threadId = store.create(thread);
+        const newThread = await agentLoop(thread);
+        
+        store.update(threadId, newThread);
 
-    const lastEvent = newThread.events[newThread.events.length - 1];
-    // If we exited the loop, include the response URL so the client can
-    // push a new message onto the thread
-    lastEvent.data.response_url = `/thread/${threadId}/response`;
+        const lastEvent = newThread.events[newThread.events.length - 1];
 
-    console.log("returning last event from endpoint", lastEvent);
-
-    res.json({ 
-        thread_id: threadId,
-        ...newThread 
+        if (thread.awaitingHumanResponse()) {
+            const hl = getHumanlayer();
+            // create a human contact - returns immediately
+            hl.createHumanContact({
+                spec: {
+                    msg: lastEvent.data.message,
+                    state: {
+                        thread_id: threadId,
+                    }
+                }
+            });
+        }
     });
+
+    res.json({ status: "processing" });
 });
 
 // GET /thread/:id - Get thread status
-app.get('/thread/:id', (req, res) => {
+app.get('/thread/:id', (req: Request, res: Response) => {
     const thread = store.get(req.params.id);
     if (!thread) {
 });
 
+type WebhookResponse = V1Beta2HumanContactCompleted;
 
-type ApprovalPayload = {
-    type: "approval";
-    approved: boolean;
-    comment?: string;
-}
+const handleHumanResponse = async (req: Request, res: Response) => {
 
-type ResponsePayload = {
-    type: "response";
-    response: string;
 }
 
-type Payload = ApprovalPayload | ResponsePayload;
+app.post('/webhook', async (req: Request, res: Response) => {
+    console.log("webhook response", req.body);
+    const response = req.body as WebhookResponse;
 
-// POST /thread/:id/response - Handle clarification response
-app.post('/thread/:id/response', async (req, res) => {
-    let thread = store.get(req.params.id);
+    // response is guaranteed to be set on a webhook
+    const humanResponse: string = response.event.status?.response as string;
+
+    const threadId = response.event.spec.state?.thread_id;
+    if (!threadId) {
+        return res.status(400).json({ error: "Thread ID not found" });
+    }
+
+    const thread = store.get(threadId);
     if (!thread) {
         return res.status(404).json({ error: "Thread not found" });
     }
 
-    const body: Payload = req.body;
-
-    let lastEvent = thread.events[thread.events.length - 1];
-
-    if (thread.awaitingHumanResponse() && body.type === 'response') {
-        thread.events.push({
-            type: "human_response",
-            data: body.response
-        });
-    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
-        // push feedback onto the thread
-        thread.events.push({
-            type: "tool_response",
-            data: `user denied the operation with feedback: "${body.comment}"`
-        });
-    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
-        // approved, run the tool, pushing results onto the thread
-        await handleNextStep(lastEvent.data, thread);
-    } else {
-        res.status(400).json({
-            error: "Invalid request: " + body.type,
-            awaitingHumanResponse: thread.awaitingHumanResponse(),
-            awaitingHumanApproval: thread.awaitingHumanApproval()
-        });
-        return;
+    if (!thread.awaitingHumanResponse()) {
+        return res.status(400).json({ error: "Thread is not awaiting human response" });
     }
 
-    
-    // loop until stop event
-    const result = await agentLoop(thread);
-
-    store.update(req.params.id, result);
-
-    lastEvent = result.events[result.events.length - 1];
-    lastEvent.data.response_url = `/thread/${req.params.id}/response`;
-
-    console.log("returning last event from endpoint", lastEvent);
-    
-    res.json(result);
 });
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/12a-server.ts src/server.ts

</details>

Start the server in another terminal

    npx tsx src/server.ts

now that the server is running, send a payload to the '/thread' endpoint


__ do the response step

__ now handle approvals for divide

__ now also handle done_for_now


================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "express": "^5.1.0",
        "humanlayer": "^0.7.7",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/express": "^5.0.1",
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0",
        "supertest": "^7.1.0"
    }
}


================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }

    awaitingHumanResponse(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
    }

    awaitingHumanApproval(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return lastEvent.data.intent === 'divide';
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "divide":
                // divide is scary, return it for human approval
                return thread;
            case "add":
            case "subtract":
            case "multiply":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
                // custom email body - jinja
                template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
<br><br>
reply to this email to approve
{% endif %}`
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const newThread = await agentLoop(thread);

    store.update(req.params.id, newThread);

    lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}

================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/walkthrough/12-1-server-init.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const result = await agentLoop(thread);

    store.update(req.params.id, result);

    lastEvent = result.events[result.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(result);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/walkthrough/12a-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/final/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/sections/final/README.md
================================================
# Chapter 0 - Hello World

Let's start with a basic TypeScript setup and a hello world program.

This guide is written in TypeScript (yes, a python version is coming soon)

There are many checkpoints between the every file edit in theworkshop steps, 
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.

To run this guide, you'll need a relatively recent version of nodejs and npm installed

You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine


    brew install node@20

You should see the node version

    node --version

Copy initial package.json

    cp ./walkthrough/00-package.json package.json

Install dependencies

    npm install

Copy tsconfig.json

    cp ./walkthrough/00-tsconfig.json tsconfig.json

add .gitignore

    cp ./walkthrough/00-.gitignore .gitignore

Create src folder

    mkdir -p src

Add a simple hello world index.ts

    cp ./walkthrough/00-index.ts src/index.ts

Run it to verify

    npx tsx src/index.ts

You should see:

    hello, world!


# Chapter 1 - CLI and Agent Loop

Now let's add BAML and create our first agent with a CLI interface.

First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.


    npm install @boundaryml/baml

Initialize BAML

    npx baml-cli init

Remove default resume.baml

    rm baml_src/resume.baml

Add our starter agent, a single baml prompt that we'll build on

    cp ./walkthrough/01-agent.baml baml_src/agent.baml

Generate BAML client code

    npx baml-cli generate

Enable BAML logging for this section

    export BAML_LOG=debug

Add the CLI interface

    cp ./walkthrough/01-cli.ts src/cli.ts

Update index.ts to use the CLI

    cp ./walkthrough/01-index.ts src/index.ts

Add the agent implementation

    cp ./walkthrough/01-agent.ts src/agent.ts

The the BAML code is configured to use OPENAI_API_KEY by default

As you're testing, you can change the model / provider to something else
as you please

        client "openai/gpt-4o"

[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)

For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) 
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.

If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key.


    export OPENAI_API_KEY=...

Try it out

    npx tsx src/index.ts hello

you should see a familiar response from the model

    {
  intent: 'done_for_now',
  message: 'Hello! How can I assist you today?'
}


# Chapter 2 - Add Calculator Tools

Let's add some calculator tools to our agent.

Let's start by adding a tool definition for the calculator

These are simpile structured outputs that we'll ask the model to 
return as a "next step" in the agentic loop.


    cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml

Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps


    cp ./walkthrough/02-agent.baml baml_src/agent.baml

Generate updated BAML client

    npx baml-cli generate

Try out the calculator

    npx tsx src/index.ts 'can you add 3 and 4'

You should see a tool call to the calculator

    {
  intent: 'add',
  a: 3,
  b: 4
}


# Chapter 3 - Process Tool Calls in a Loop

Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.

First, lets update the agent to handle the tool call


    cp ./walkthrough/03-agent.ts src/agent.ts

Now, lets try it out


    npx tsx src/index.ts 'can you add 3 and 4'

you should see the agent call the tool and then return the result

    {
  intent: 'done_for_now',
  message: 'The sum of 3 and 4 is 7.'
}

For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output

    export BAML_LOG=off

Try a multi-step calculation

    npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'

you'll notice that tools like multiply and divide are not available

    npx tsx src/index.ts 'can you multiply 3 and 4'

next, let's add handlers for the rest of the calculator tools


    cp ./walkthrough/03b-agent.ts src/agent.ts

Test subtraction

    npx tsx src/index.ts 'can you subtract 3 from 4'

now, let's test the multiplication tool


    npx tsx src/index.ts 'can you multiply 3 and 4'

finally, let's test a more complex calculation with multiple operations


    npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'


# Chapter 4 - Add Tests to agent.baml

Let's add some tests to our BAML agent.

to start, leave the baml logs enabled

    export BAML_LOG=debug

next, let's add some tests to the agent

We'll start with a simple test that checks the agent's ability to handle
a basic calculation.


    cp ./walkthrough/04-agent.baml baml_src/agent.baml

Run the tests

    npx baml-cli test

now, let's improve the test with assertions!

Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.


    cp ./walkthrough/04b-agent.baml baml_src/agent.baml

Run the tests

    npx baml-cli test

as you add more tests, you can disable the logs to keep the output clean. 
You may want to turn them on as you iterate on specific tests.


    export BAML_LOG=off

now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window


    cp ./walkthrough/04c-agent.baml baml_src/agent.baml

let's try to run it


    npx baml-cli test


# Chapter 5 - Multiple Human Tools

In this section, we'll add support for multiple tools that serve to 
contact humans.


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

first, let's add a tool that can request clarification from a human 

this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.


    cp ./walkthrough/05-agent.baml baml_src/agent.baml

next, let's re-generate the client code

NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.


    npx baml-cli generate

now, let's update the agent to use the new tool


    cp ./walkthrough/05-agent.ts src/agent.ts

next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI


    cp ./walkthrough/05-cli.ts src/cli.ts

let's try it out


    npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '

next, let's add a test that checks the agent's ability to handle
a clarification request


    cp ./walkthrough/05b-agent.baml baml_src/agent.baml

and now we can run the tests again


    npx baml-cli test

you'll notice the new test passes, but the hello world test fails

This is because the agent's default behavior is to return "done_for_now"


    cp ./walkthrough/05c-agent.baml baml_src/agent.baml

Verify tests pass

    npx baml-cli test


# Chapter 6 - Customize Your Prompt with Reasoning

In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.

this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)

there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)


for this section, it will be helpful to leave the baml logs enabled

    export BAML_LOG=debug

update the agent prompt to include a reasoning step


    cp ./walkthrough/06-agent.baml baml_src/agent.baml

generate the updated client

    npx baml-cli generate

now, you can try it out with a simple prompt


    npx tsx src/index.ts 'can you multiply 3 and 4'

you should see output from the baml logs showing the reasoning steps

#### optional challenge 

add a field to your tool output format that includes the reasoning steps in the output!


# Chapter 7 - Customize Your Context Window

In this section, we'll explore how to customize the context window
of the agent.

this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)


update the agent to pretty-print the Context window for the model


    cp ./walkthrough/07-agent.ts src/agent.ts

Test the formatting

    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

next, let's update the agent to use XML formatting instead 

this is a very popular format for passing data to a model,

among other things, because of the token efficiency of XML.


    cp ./walkthrough/07b-agent.ts src/agent.ts

let's try it out


    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

lets update our tests to match the new output format


    cp ./walkthrough/07c-agent.baml baml_src/agent.baml

check out the updated tests


    npx baml-cli test


# Chapter 8 - Adding API Endpoints

Add an Express server to expose the agent via HTTP.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install Express and types

    npm install express && npm install --save-dev @types/express supertest

Add the server implementation

    cp ./walkthrough/08-server.ts src/server.ts

Start the server

    npx tsx src/server.ts

Test with curl (in another terminal)

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you add 3 and 4"}'

You should get an answer from the agent which includes the
agentic trace, ending in a message like: 


    {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}


# Chapter 9 - In-Memory State and Async Clarification

Add state management and async clarification support.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Add some simple in-memory state management for threads

    cp ./walkthrough/09-state.ts src/state.ts

update the server to use the state management

* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id 
* implement POST /thread/:id/response


    cp ./walkthrough/09-server.ts src/server.ts

Start the server

    npx tsx src/server.ts

Test clarification flow

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you multiply 3 and xyz"}'


# Chapter 10 - Adding Human Approval

Add support for human approval of operations.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

update the server to handle human approvals

* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs


    cp ./walkthrough/10-server.ts src/server.ts

Add a few methods to the agent to handle approvals and responses

    cp ./walkthrough/10-agent.ts src/agent.ts

Start the server

    npx tsx src/server.ts

Test division with approval

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you divide 3 by 4"}'

You should see:

    {
  "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
  "events": [
    {
      "type": "user_input",
      "data": "can you divide 3 by 4"
    },
    {
      "type": "tool_call",
      "data": {
        "intent": "divide",
        "a": 3,
        "b": 4,
        "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
      }
    }
  ]
}

reject the request with another curl call, changing the thread ID

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'

You should see: the last tool call is now `"intent":"divide","a":3,"b":5`

    {
  "events": [
    {
      "type": "user_input",
      "data": "can you divide 3 by 4"
    },
    {
      "type": "tool_call",
      "data": {
        "intent": "divide",
        "a": 3,
        "b": 4,
        "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
      }
    },
    {
      "type": "tool_response",
      "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
    },
    {
      "type": "tool_call",
      "data": {
        "intent": "divide",
        "a": 3,
        "b": 5,
        "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
      }
    }
  ]
}

now you can approve the operation

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": true}'

you should see the final message includes the tool response and final result!

    ...
{
  "type": "tool_response",
  "data": 0.5
},
{
  "type": "done_for_now",
  "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
  "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
}


# Chapter 11 - Human Approvals over email

in this section, we'll add support for human approvals over email.

This will start a little bit contrived, just to get the concepts down - 

We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI

While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install HumanLayer

    npm install humanlayer

Update CLI to send `divide` and `request_more_information` to a human via email

    cp ./walkthrough/11-cli.ts src/cli.ts

Run the CLI

    npx tsx src/index.ts 'can you divide 4 by 5'

The last line of your program should mention human review step

    nextStep { intent: 'divide', a: 4, b: 5 }
HumanLayer: Requested human approval from HumanLayer cloud

go ahead and respond to the email with some feedback:

![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true)


you should get another email with an updated attempt based on your feedback!

You can go ahead and approve this one:

![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true)


and your final output will look like

    nextStep {
 intent: 'done_for_now',
 message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
}
The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!

lets implement the `request_more_information` flow as well


    cp ./walkthrough/11b-cli.ts src/cli.ts

lets test the require_approval flow as by asking for a calculation
with garbled input:


    npx tsx src/index.ts 'can you multiply 4 and xyz'

You should get an email with a request for clarification

    Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?

you can response with something like

    use 8 instead of xyz

you should see a final result on the CLI like

    I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.

as a final step, lets explore using a custom html template for the email


    cp ./walkthrough/11c-cli.ts src/cli.ts

first try with divide:


    npx tsx src/index.ts 'can you divide 4 by 5'

you should see a slightly different email with the custom template

![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true)

feel free to run with the flow and then you can try updating the template to your liking

(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)

try triggering "request_more_information" as well!


thats it - in the next chapter, we'll build a fully email-driven 
workflow agent that uses webhooks for human approval 


# Chapter XX - HumanLayer Webhook Integration

the previous sections used the humanlayer SDK in "synchronous mode" - that 
means every time we wait for human approval, we sit in a loop 
polling until the human response if received.

That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results. 


add code to initialize humanlayer in the server


    cp ./walkthrough/12-1-server-init.ts src/server.ts

next, lets update the /thread endpoint to 
  
1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls


Update the server to be able to handle request_clarification responses

- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response


    cp ./walkthrough/12a-server.ts src/server.ts

Start the server in another terminal

    npx tsx src/server.ts

now that the server is running, send a payload to the '/thread' endpoint


__ do the response step

__ now handle approvals for divide

__ now also handle done_for_now


================================================
FILE: workshops/2025-05/sections/final/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05/sections/final/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05/sections/final/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.85.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05/sections/final/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/sections/final/package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
        "dev": "tsx src/index.ts",
        "build": "tsc"
    },
    "dependencies": {
        "baml": "^0.0.0",
        "express": "^5.1.0",
        "humanlayer": "^0.7.7",
        "tsx": "^4.15.0",
        "typescript": "^5.0.0"
    },
    "devDependencies": {
        "@types/express": "^5.0.1",
        "@types/node": "^20.0.0",
        "@typescript-eslint/eslint-plugin": "^6.0.0",
        "@typescript-eslint/parser": "^6.0.0",
        "eslint": "^8.0.0",
        "supertest": "^7.1.0"
    }
}


================================================
FILE: workshops/2025-05/sections/final/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }

    awaitingHumanResponse(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
    }

    awaitingHumanApproval(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return lastEvent.data.intent === 'divide';
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "divide":
                // divide is scary, return it for human approval
                return thread;
            case "add":
            case "subtract":
            case "multiply":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/sections/final/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
                // custom email body - jinja
                template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
<br><br>
reply to this email to approve
{% endif %}`
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05/sections/final/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/sections/final/src/server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/sections/final/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}

================================================
FILE: workshops/2025-05/sections/final/tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/walkthrough/00-.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05/walkthrough/00-index.ts
================================================
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/walkthrough/00-package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
  

================================================
FILE: workshops/2025-05/walkthrough/00-tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05/walkthrough/01-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05/walkthrough/01-agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}


================================================
FILE: workshops/2025-05/walkthrough/01-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05/walkthrough/01-index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05/walkthrough/02-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05/walkthrough/02-tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05/walkthrough/03-agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}


export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
                thread.events.push({
                    "type": "tool_call",
                    "data": nextStep
                });
                const result = nextStep.a + nextStep.b;
                console.log("tool_response", result);
                thread.events.push({
                    "type": "tool_response",
                    "data": result
                });
                continue;
            default:
                throw new Error(`Unknown intent: ${nextStep.intent}`);
        }
    }
}


================================================
FILE: workshops/2025-05/walkthrough/03b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/walkthrough/04-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
}


================================================
FILE: workshops/2025-05/walkthrough/04b-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(hello, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(math_operation, {{this.intent == "multiply"}})
}


================================================
FILE: workshops/2025-05/walkthrough/04c-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


================================================
FILE: workshops/2025-05/walkthrough/05-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


================================================
FILE: workshops/2025-05/walkthrough/05-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/walkthrough/05-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "../src/agent";


export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    let lastEvent = result.events.slice(-1)[0];

    while (lastEvent.data.intent === "request_more_information") {
        const message = await askHuman(lastEvent.data.message);
        thread.events.push({ type: "human_response", data: message });
        const result = await agentLoop(thread);
        lastEvent = result.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(message: string) {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve(answer);
        });
    });
}


================================================
FILE: workshops/2025-05/walkthrough/05b-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05/walkthrough/05c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05/walkthrough/06-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        First, always plan out what to do next, for example:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05/walkthrough/07-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events, null, 2);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/walkthrough/07b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/walkthrough/07c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05/walkthrough/08-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    const result = await agentLoop(thread);
    res.json(result);
});

// GET /thread/:id - Get thread status 
app.get('/thread/:id', (req, res) => {
    // optional - add state
    res.status(404).json({ error: "Not implemented yet" });
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/walkthrough/09-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    
    thread.events.push({
        type: "human_response",
        data: req.body.message
    });
    
    // loop until stop event
    const newThread = await agentLoop(thread);
    
    store.update(req.params.id, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/walkthrough/09-state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}

================================================
FILE: workshops/2025-05/walkthrough/10-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }

    awaitingHumanResponse(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
    }

    awaitingHumanApproval(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return lastEvent.data.intent === 'divide';
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "divide":
                // divide is scary, return it for human approval
                return thread;
            case "add":
            case "subtract":
            case "multiply":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05/walkthrough/10-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const newThread = await agentLoop(thread);

    store.update(req.params.id, newThread);

    lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/walkthrough/11-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
            }
        }
    }) 

    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05/walkthrough/11b-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05/walkthrough/11c-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
                // custom email body - jinja
                template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
<br><br>
reply to this email to approve
{% endif %}`
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05/walkthrough/12-1-server-init.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const result = await agentLoop(thread);

    store.update(req.params.id, result);

    lastEvent = result.events[result.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(result);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/walkthrough/12-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();


const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/walkthrough/12a-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/walkthrough/12aa-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();


const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/walkthrough/12b-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { V1Beta2EmailEventReceived, V1Beta2FunctionCallCompleted, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const result = await agentLoop(thread);

    store.update(req.params.id, result);

    lastEvent = result.events[result.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(result);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

app.post('/webhook/response', async (req, res) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

    thread.events.push({
        type: "human_response",
        data: response.event.status?.response
    });

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05/walkthrough.md
================================================
# Building the 12-factor agent template from scratch

Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology.

## Cleanup

Make sure you're starting from a clean slate

Clean up existing files

    rm -rf baml_src/ && rm -rf src/

## Chapter 0 - Hello World

Let's start with a basic TypeScript setup and a hello world program.

This guide is written in TypeScript (yes, a python version is coming soon)

There are many checkpoints between the every file edit in theworkshop steps, 
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.

To run this guide, you'll need a relatively recent version of nodejs and npm installed

You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine


    brew install node@20

You should see the node version

    node --version

Copy initial package.json

    cp ./walkthrough/00-package.json package.json

<details>
<summary>show file</summary>

```json
// ./walkthrough/00-package.json
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
```

</details>

Install dependencies

    npm install

Copy tsconfig.json

    cp ./walkthrough/00-tsconfig.json tsconfig.json

<details>
<summary>show file</summary>

```json
// ./walkthrough/00-tsconfig.json
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
```

</details>

add .gitignore

    cp ./walkthrough/00-.gitignore .gitignore

<details>
<summary>show file</summary>

```gitignore
// ./walkthrough/00-.gitignore
baml_client/
node_modules/
```

</details>

Create src folder

Add a simple hello world index.ts

    cp ./walkthrough/00-index.ts src/index.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/00-index.ts
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)
```

</details>

Run it to verify

    npx tsx src/index.ts

You should see:

    hello, world!

## Chapter 1 - CLI and Agent Loop

Now let's add BAML and create our first agent with a CLI interface.

First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.


    npm install @boundaryml/baml

Initialize BAML

    npx baml-cli init

Remove default resume.baml

    rm baml_src/resume.baml

Add our starter agent, a single baml prompt that we'll build on

    cp ./walkthrough/01-agent.baml baml_src/agent.baml

<details>
<summary>show file</summary>

```rust
// ./walkthrough/01-agent.baml
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}
```

</details>

Generate BAML client code

    npx baml-cli generate

Enable BAML logging for this section

    export BAML_LOG=debug

Add the CLI interface

    cp ./walkthrough/01-cli.ts src/cli.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/01-cli.ts
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}
```

</details>

Update index.ts to use the CLI

```diff
src/index.ts
+import { cli } from "./cli"
+
 async function hello(): Promise<void> {
     console.log('hello, world!')
 
 async function main() {
-    await hello()
+    await cli()
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/01-index.ts src/index.ts

</details>

Add the agent implementation

    cp ./walkthrough/01-agent.ts src/agent.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/01-agent.ts
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}
```

</details>

The the BAML code is configured to use OPENAI_API_KEY by default

As you're testing, you can change the model / provider to something else
as you please

        client "openai/gpt-4o"

[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)

For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) 
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.

If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key.


    export OPENAI_API_KEY=...

Try it out

    npx tsx src/index.ts hello

you should see a familiar response from the model

    {
  intent: 'done_for_now',
  message: 'Hello! How can I assist you today?'
}

## Chapter 2 - Add Calculator Tools

Let's add some calculator tools to our agent.

Let's start by adding a tool definition for the calculator

These are simpile structured outputs that we'll ask the model to 
return as a "next step" in the agentic loop.


    cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml

<details>
<summary>show file</summary>

```rust
// ./walkthrough/02-tool_calculator.baml
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}
```

</details>

Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps


```diff
baml_src/agent.baml
 function DetermineNextStep(
     thread: string 
-) -> DoneForNow {
+) -> CalculatorTools | DoneForNow {
     client "openai/gpt-4o"
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/02-agent.baml baml_src/agent.baml

</details>

Generate updated BAML client

    npx baml-cli generate

Try out the calculator

    npx tsx src/index.ts 'can you add 3 and 4'

You should see a tool call to the calculator

    {
  intent: 'add',
  a: 3,
  b: 4
}

## Chapter 3 - Process Tool Calls in a Loop

Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.

First, lets update the agent to handle the tool call


```diff
src/agent.ts
 }
 
-// right now this just runs one turn with the LLM, but
-// we'll update this function to handle all the agent logic
-export async function agentLoop(thread: Thread): Promise<AgentResponse> {
-    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
-    return nextStep;
+
+
+export async function agentLoop(thread: Thread): Promise<string> {
+
+    while (true) {
+        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
+        console.log("nextStep", nextStep);
+
+        switch (nextStep.intent) {
+            case "done_for_now":
+                // response to human, return the next step object
+                return nextStep.message;
+            case "add":
+                thread.events.push({
+                    "type": "tool_call",
+                    "data": nextStep
+                });
+                const result = nextStep.a + nextStep.b;
+                console.log("tool_response", result);
+                thread.events.push({
+                    "type": "tool_response",
+                    "data": result
+                });
+                continue;
+            default:
+                throw new Error(`Unknown intent: ${nextStep.intent}`);
+        }
+    }
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/03-agent.ts src/agent.ts

</details>

Now, lets try it out


    npx tsx src/index.ts 'can you add 3 and 4'

you should see the agent call the tool and then return the result

    {
  intent: 'done_for_now',
  message: 'The sum of 3 and 4 is 7.'
}

For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output

    export BAML_LOG=off

Try a multi-step calculation

    npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'

you'll notice that tools like multiply and divide are not available

    npx tsx src/index.ts 'can you multiply 3 and 4'

next, let's add handlers for the rest of the calculator tools


```diff
src/agent.ts
-import { b } from "../baml_client";
+import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
 
-// tool call or a respond to human tool
-type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
-
 export interface Event {
     type: string
 }
 
+export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
 
+export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
+    let result: number;
+    switch (nextStep.intent) {
+        case "add":
+            result = nextStep.a + nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "subtract":
+            result = nextStep.a - nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "multiply":
+            result = nextStep.a * nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "divide":
+            result = nextStep.a / nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+    }
+}
 
 export async function agentLoop(thread: Thread): Promise<string> {
         console.log("nextStep", nextStep);
 
+        thread.events.push({
+            "type": "tool_call",
+            "data": nextStep
+        });
+
         switch (nextStep.intent) {
             case "done_for_now":
                 return nextStep.message;
             case "add":
-                thread.events.push({
-                    "type": "tool_call",
-                    "data": nextStep
-                });
-                const result = nextStep.a + nextStep.b;
-                console.log("tool_response", result);
-                thread.events.push({
-                    "type": "tool_response",
-                    "data": result
-                });
-                continue;
-            default:
-                throw new Error(`Unknown intent: ${nextStep.intent}`);
+            case "subtract":
+            case "multiply":
+            case "divide":
+                thread = await handleNextStep(nextStep, thread);
         }
     }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/03b-agent.ts src/agent.ts

</details>

Test subtraction

    npx tsx src/index.ts 'can you subtract 3 from 4'

now, let's test the multiplication tool


    npx tsx src/index.ts 'can you multiply 3 and 4'

finally, let's test a more complex calculation with multiple operations


    npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

## Chapter 4 - Add Tests to agent.baml

Let's add some tests to our BAML agent.

to start, leave the baml logs enabled

    export BAML_LOG=debug

next, let's add some tests to the agent

We'll start with a simple test that checks the agent's ability to handle
a basic calculation.


```diff
baml_src/agent.baml
     "#
   }
+
+test MathOperation {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+      {
+        "type": "user_input",
+        "data": "can you multiply 3 and 4?"
+      }
+    "#
+  }
+}
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/04-agent.baml baml_src/agent.baml

</details>

Run the tests

    npx baml-cli test

now, let's improve the test with assertions!

Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.


```diff
baml_src/agent.baml
     "#
   }
+  @@assert(hello, {{this.intent == "done_for_now"}})
 }
 
     "#
   }
+  @@assert(math_operation, {{this.intent == "multiply"}})
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/04b-agent.baml baml_src/agent.baml

</details>

Run the tests

    npx baml-cli test

as you add more tests, you can disable the logs to keep the output clean. 
You may want to turn them on as you iterate on specific tests.


    export BAML_LOG=off

now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window


```diff
baml_src/agent.baml
     "#
   }
-  @@assert(hello, {{this.intent == "done_for_now"}})
+  @@assert(intent, {{this.intent == "done_for_now"}})
 }
 
     "#
   }
-  @@assert(math_operation, {{this.intent == "multiply"}})
+  @@assert(intent, {{this.intent == "multiply"}})
 }
 
+test LongMath {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+      [
+        {
+          "type": "user_input",
+          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
+        },
+        {
+          "type": "tool_call",
+          "data": {
+            "intent": "multiply",
+            "a": 3,
+            "b": 4
+          }
+        },
+        {
+          "type": "tool_response",
+          "data": 12
+        },
+        {
+          "type": "tool_call", 
+          "data": {
+            "intent": "divide",
+            "a": 12,
+            "b": 2
+          }
+        },
+        {
+          "type": "tool_response",
+          "data": 6
+        },
+        {
+          "type": "tool_call",
+          "data": {
+            "intent": "add", 
+            "a": 6,
+            "b": 12
+          }
+        },
+        {
+          "type": "tool_response",
+          "data": 18
+        }
+      ]
+    "#
+  }
+  @@assert(intent, {{this.intent == "done_for_now"}})
+  @@assert(answer, {{"18" in this.message}})
+}
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/04c-agent.baml baml_src/agent.baml

</details>

let's try to run it


    npx baml-cli test

## Chapter 5 - Multiple Human Tools

In this section, we'll add support for multiple tools that serve to 
contact humans.


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

first, let's add a tool that can request clarification from a human 

this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.


```diff
baml_src/agent.baml
+// human tools are async requests to a human
+type HumanTools = ClarificationRequest | DoneForNow
+
+class ClarificationRequest {
+  intent "request_more_information" @description("you can request more information from me")
+  message string
+}
+
 class DoneForNow {
   intent "done_for_now"
-  message string 
+
+  message string @description(#"
+    message to send to the user about the work that was done. 
+  "#)
 }
 
 function DetermineNextStep(
     thread: string 
-) -> CalculatorTools | DoneForNow {
+) -> HumanTools | CalculatorTools {
     client "openai/gpt-4o"
 
 }
 
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05-agent.baml baml_src/agent.baml

</details>

next, let's re-generate the client code

NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.


    npx baml-cli generate

now, let's update the agent to use the new tool


```diff
src/agent.ts
 }
 
-export async function agentLoop(thread: Thread): Promise<string> {
+export async function agentLoop(thread: Thread): Promise<Thread> {
 
     while (true) {
         switch (nextStep.intent) {
             case "done_for_now":
-                // response to human, return the next step object
-                return nextStep.message;
+            case "request_more_information":
+                // response to human, return the thread
+                return thread;
             case "add":
             case "subtract":
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05-agent.ts src/agent.ts

</details>

next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI


```diff
src/cli.ts
 // cli.ts lets you invoke the agent loop from the command line
 
-import { agentLoop, Thread, Event } from "./agent";
+import { agentLoop, Thread, Event } from "../src/agent";
 
+
+
 export async function cli() {
     // Get command line arguments, skipping the first two (node and script name)
     // Run the agent loop with the thread
     const result = await agentLoop(thread);
-    console.log(result);
+    let lastEvent = result.events.slice(-1)[0];
+
+    while (lastEvent.data.intent === "request_more_information") {
+        const message = await askHuman(lastEvent.data.message);
+        thread.events.push({ type: "human_response", data: message });
+        const result = await agentLoop(thread);
+        lastEvent = result.events.slice(-1)[0];
+    }
+
+    // print the final result
+    // optional - you could loop here too
+    console.log(lastEvent.data.message);
+    process.exit(0);
 }
+
+async function askHuman(message: string) {
+    const readline = require('readline').createInterface({
+        input: process.stdin,
+        output: process.stdout
+    });
+
+    return new Promise((resolve) => {
+        readline.question(`${message}\n> `, (answer: string) => {
+            resolve(answer);
+        });
+    });
+}
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05-cli.ts src/cli.ts

</details>

let's try it out


    npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '

next, let's add a test that checks the agent's ability to handle
a clarification request


```diff
baml_src/agent.baml
 
 
+
+test MathOperationWithClarification {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+      "#
+  }
+  @@assert(intent, {{this.intent == "request_more_information"}})
+}
+
+test MathOperationPostClarification {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+        [
+        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
+        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
+        {"type":"human_response","data":"lets try 12 instead"},
+      ]
+      "#
+  }
+  @@assert(intent, {{this.intent == "multiply"}})
+  @@assert(a, {{this.b == 12}})
+  @@assert(b, {{this.a == 3}})
+}
+        
+
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05b-agent.baml baml_src/agent.baml

</details>

and now we can run the tests again


    npx baml-cli test

you'll notice the new test passes, but the hello world test fails

This is because the agent's default behavior is to return "done_for_now"


```diff
baml_src/agent.baml
     "#
   }
-  @@assert(intent, {{this.intent == "done_for_now"}})
+  @@assert(intent, {{this.intent == "request_more_information"}})
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05c-agent.baml baml_src/agent.baml

</details>

Verify tests pass

    npx baml-cli test

## Chapter 6 - Customize Your Prompt with Reasoning

In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.

this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)

there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)


for this section, it will be helpful to leave the baml logs enabled

    export BAML_LOG=debug

update the agent prompt to include a reasoning step


```diff
baml_src/agent.baml
 
         {{ ctx.output_format }}
+
+        First, always plan out what to do next, for example:
+
+        - ...
+        - ...
+        - ...
+
+        {...} // schema
     "#
 }
   @@assert(b, {{this.a == 3}})
 }
-        
-
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/06-agent.baml baml_src/agent.baml

</details>

generate the updated client

    npx baml-cli generate

now, you can try it out with a simple prompt


    npx tsx src/index.ts 'can you multiply 3 and 4'

you should see output from the baml logs showing the reasoning steps

#### optional challenge 

add a field to your tool output format that includes the reasoning steps in the output!


## Chapter 7 - Customize Your Context Window

In this section, we'll explore how to customize the context window
of the agent.

this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)


update the agent to pretty-print the Context window for the model


```diff
src/agent.ts
         // can change this to whatever custom serialization you want to do, XML, etc
         // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
-        return JSON.stringify(this.events);
+        return JSON.stringify(this.events, null, 2);
     }
 }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/07-agent.ts src/agent.ts

</details>

Test the formatting

    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

next, let's update the agent to use XML formatting instead 

this is a very popular format for passing data to a model,

among other things, because of the token efficiency of XML.


```diff
src/agent.ts
 
     serializeForLLM() {
-        // can change this to whatever custom serialization you want to do, XML, etc
-        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
-        return JSON.stringify(this.events, null, 2);
+        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
     }
+
+    trimLeadingWhitespace(s: string) {
+        return s.replace(/^[ \t]+/gm, '');
+    }
+
+    serializeOneEvent(e: Event) {
+        return this.trimLeadingWhitespace(`
+            <${e.data?.intent || e.type}>
+            ${
+            typeof e.data !== 'object' ? e.data :
+            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
+            </${e.data?.intent || e.type}>
+        `)
+    }
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/07b-agent.ts src/agent.ts

</details>

let's try it out


    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

lets update our tests to match the new output format


```diff
baml_src/agent.baml
         {{ ctx.output_format }}
 
-        First, always plan out what to do next, for example:
+        Always think about what to do next first, like:
 
         - ...
   args {
     thread #"
-      {
-        "type": "user_input",
-        "data": "hello!"
-      }
+      <user_input>
+        hello!
+      </user_input>
     "#
   }
   args {
     thread #"
-      {
-        "type": "user_input",
-        "data": "can you multiply 3 and 4?"
-      }
+      <user_input>
+        can you multiply 3 and 4?
+      </user_input>
     "#
   }
   args {
     thread #"
-      [
-        {
-          "type": "user_input",
-          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
-        },
-        {
-          "type": "tool_call",
-          "data": {
-            "intent": "multiply",
-            "a": 3,
-            "b": 4
-          }
-        },
-        {
-          "type": "tool_response",
-          "data": 12
-        },
-        {
-          "type": "tool_call", 
-          "data": {
-            "intent": "divide",
-            "a": 12,
-            "b": 2
-          }
-        },
-        {
-          "type": "tool_response",
-          "data": 6
-        },
-        {
-          "type": "tool_call",
-          "data": {
-            "intent": "add", 
-            "a": 6,
-            "b": 12
-          }
-        },
-        {
-          "type": "tool_response",
-          "data": 18
-        }
-      ]
+         <user_input>
+    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
+    </user_input>
+
+
+    <multiply>
+    a: 3
+    b: 4
+    </multiply>
+
+
+    <tool_response>
+    12
+    </tool_response>
+
+
+    <divide>
+    a: 12
+    b: 2
+    </divide>
+
+
+    <tool_response>
+    6
+    </tool_response>
+
+
+    <add>
+    a: 6
+    b: 12
+    </add>
+
+
+    <tool_response>
+    18
+    </tool_response>
+
     "#
   }
   args {
     thread #"
-          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+          <user_input>
+          can you multiply 3 and fe1iiaff10
+          </user_input>
       "#
   }
   args {
     thread #"
-        [
-        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
-        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
-        {"type":"human_response","data":"lets try 12 instead"},
-      ]
+        <user_input>
+        can you multiply 3 and FD*(#F&& ?
+        </user_input>
+
+        <request_more_information>
+        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
+        </request_more_information>
+
+        <human_response>
+        lets try 12 instead
+        </human_response>
       "#
   }
   @@assert(intent, {{this.intent == "multiply"}})
 }
         
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/07c-agent.baml baml_src/agent.baml

</details>

check out the updated tests


    npx baml-cli test

## Chapter 8 - Adding API Endpoints

Add an Express server to expose the agent via HTTP.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install Express and types

    npm install express && npm install --save-dev @types/express supertest

Add the server implementation

    cp ./walkthrough/08-server.ts src/server.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/08-server.ts
import express from 'express';
import { Thread, agentLoop } from '../src/agent';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    const result = await agentLoop(thread);
    res.json(result);
});

// GET /thread/:id - Get thread status 
app.get('/thread/:id', (req, res) => {
    // optional - add state
    res.status(404).json({ error: "Not implemented yet" });
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };
```

</details>

Start the server

    npx tsx src/server.ts

Test with curl (in another terminal)

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you add 3 and 4"}'

You should get an answer from the agent which includes the
agentic trace, ending in a message like: 


    {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}

## Chapter 9 - In-Memory State and Async Clarification

Add state management and async clarification support.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Add some simple in-memory state management for threads

    cp ./walkthrough/09-state.ts src/state.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/09-state.ts
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}
```

</details>

update the server to use the state management

* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id 
* implement POST /thread/:id/response


```diff
src/server.ts
 import express from 'express';
 import { Thread, agentLoop } from '../src/agent';
+import { ThreadStore } from '../src/state';
 
 const app = express();
 app.set('json spaces', 2);
 
+const store = new ThreadStore();
+
 // POST /thread - Start new thread
 app.post('/thread', async (req, res) => {
         data: req.body.message
     }]);
-    const result = await agentLoop(thread);
-    res.json(result);
+    
+    const threadId = store.create(thread);
+    const newThread = await agentLoop(thread);
+    
+    store.update(threadId, newThread);
+
+    const lastEvent = newThread.events[newThread.events.length - 1];
+    // If we exited the loop, include the response URL so the client can
+    // push a new message onto the thread
+    lastEvent.data.response_url = `/thread/${threadId}/response`;
+
+    console.log("returning last event from endpoint", lastEvent);
+
+    res.json({ 
+        thread_id: threadId,
+        ...newThread 
+    });
 });
 
 app.get('/thread/:id', (req, res) => {
-    // optional - add state
-    res.status(404).json({ error: "Not implemented yet" });
+    const thread = store.get(req.params.id);
+    if (!thread) {
+        return res.status(404).json({ error: "Thread not found" });
+    }
+    res.json(thread);
 });
 
+// POST /thread/:id/response - Handle clarification response
+app.post('/thread/:id/response', async (req, res) => {
+    let thread = store.get(req.params.id);
+    if (!thread) {
+        return res.status(404).json({ error: "Thread not found" });
+    }
+    
+    thread.events.push({
+        type: "human_response",
+        data: req.body.message
+    });
+    
+    // loop until stop event
+    const newThread = await agentLoop(thread);
+    
+    store.update(req.params.id, newThread);
+
+    const lastEvent = newThread.events[newThread.events.length - 1];
+    lastEvent.data.response_url = `/thread/${req.params.id}/response`;
+
+    console.log("returning last event from endpoint", lastEvent);
+    
+    res.json(newThread);
+});
+
 const port = process.env.PORT || 3000;
 app.listen(port, () => {
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/09-server.ts src/server.ts

</details>

Start the server

    npx tsx src/server.ts

Test clarification flow

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you multiply 3 and xyz"}'

## Chapter 10 - Adding Human Approval

Add support for human approval of operations.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

update the server to handle human approvals

* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs


```diff
src/server.ts
 import express from 'express';
-import { Thread, agentLoop } from '../src/agent';
+import { Thread, agentLoop, handleNextStep } from '../src/agent';
 import { ThreadStore } from '../src/state';
 
 });
 
+
+type ApprovalPayload = {
+    type: "approval";
+    approved: boolean;
+    comment?: string;
+}
+
+type ResponsePayload = {
+    type: "response";
+    response: string;
+}
+
+type Payload = ApprovalPayload | ResponsePayload;
+
 // POST /thread/:id/response - Handle clarification response
 app.post('/thread/:id/response', async (req, res) => {
         return res.status(404).json({ error: "Thread not found" });
     }
+
+    const body: Payload = req.body;
+
+    let lastEvent = thread.events[thread.events.length - 1];
+
+    if (thread.awaitingHumanResponse() && body.type === 'response') {
+        thread.events.push({
+            type: "human_response",
+            data: body.response
+        });
+    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
+        // push feedback onto the thread
+        thread.events.push({
+            type: "tool_response",
+            data: `user denied the operation with feedback: "${body.comment}"`
+        });
+    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
+        // approved, run the tool, pushing results onto the thread
+        await handleNextStep(lastEvent.data, thread);
+    } else {
+        res.status(400).json({
+            error: "Invalid request: " + body.type,
+            awaitingHumanResponse: thread.awaitingHumanResponse(),
+            awaitingHumanApproval: thread.awaitingHumanApproval()
+        });
+        return;
+    }
+
     
-    thread.events.push({
-        type: "human_response",
-        data: req.body.message
-    });
-    
     // loop until stop event
     const newThread = await agentLoop(thread);
     store.update(req.params.id, newThread);
 
-    const lastEvent = newThread.events[newThread.events.length - 1];
+    lastEvent = newThread.events[newThread.events.length - 1];
     lastEvent.data.response_url = `/thread/${req.params.id}/response`;
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/10-server.ts src/server.ts

</details>

Add a few methods to the agent to handle approvals and responses

```diff
src/agent.ts
         `)
     }
+
+    awaitingHumanResponse(): boolean {
+        const lastEvent = this.events[this.events.length - 1];
+        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
+    }
+
+    awaitingHumanApproval(): boolean {
+        const lastEvent = this.events[this.events.length - 1];
+        return lastEvent.data.intent === 'divide';
+    }
 }
 
                 // response to human, return the thread
                 return thread;
+            case "divide":
+                // divide is scary, return it for human approval
+                return thread;
             case "add":
             case "subtract":
             case "multiply":
-            case "divide":
                 thread = await handleNextStep(nextStep, thread);
         }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/10-agent.ts src/agent.ts

</details>

Start the server

    npx tsx src/server.ts

Test division with approval

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you divide 3 by 4"}'

You should see:

    {
  "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
  "events": [
    {
      "type": "user_input",
      "data": "can you divide 3 by 4"
    },
    {
      "type": "tool_call",
      "data": {
        "intent": "divide",
        "a": 3,
        "b": 4,
        "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
      }
    }
  ]
}

reject the request with another curl call, changing the thread ID

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'

You should see: the last tool call is now `"intent":"divide","a":3,"b":5`

    {
  "events": [
    {
      "type": "user_input",
      "data": "can you divide 3 by 4"
    },
    {
      "type": "tool_call",
      "data": {
        "intent": "divide",
        "a": 3,
        "b": 4,
        "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
      }
    },
    {
      "type": "tool_response",
      "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
    },
    {
      "type": "tool_call",
      "data": {
        "intent": "divide",
        "a": 3,
        "b": 5,
        "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
      }
    }
  ]
}

now you can approve the operation

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": true}'

you should see the final message includes the tool response and final result!

    ...
{
  "type": "tool_response",
  "data": 0.5
},
{
  "type": "done_for_now",
  "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
  "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
}

## Chapter 11 - Human Approvals over email

in this section, we'll add support for human approvals over email.

This will start a little bit contrived, just to get the concepts down - 

We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI

While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install HumanLayer

    npm install humanlayer

Update CLI to send `divide` and `request_more_information` to a human via email

```diff
src/cli.ts
 // cli.ts lets you invoke the agent loop from the command line
 
+import { humanlayer } from "humanlayer";
 import { agentLoop, Thread, Event } from "../src/agent";
 
-
-
 export async function cli() {
     // Get command line arguments, skipping the first two (node and script name)
 
     // Run the agent loop with the thread
-    const result = await agentLoop(thread);
-    let lastEvent = result.events.slice(-1)[0];
+    let newThread = await agentLoop(thread);
+    let lastEvent = newThread.events.slice(-1)[0];
 
-    while (lastEvent.data.intent === "request_more_information") {
-        const message = await askHuman(lastEvent.data.message);
-        thread.events.push({ type: "human_response", data: message });
-        const result = await agentLoop(thread);
-        lastEvent = result.events.slice(-1)[0];
+    while (lastEvent.data.intent !== "done_for_now") {
+        const responseEvent = await askHuman(lastEvent);
+        thread.events.push(responseEvent);
+        newThread = await agentLoop(thread);
+        lastEvent = newThread.events.slice(-1)[0];
     }
 
     // print the final result
     console.log(lastEvent.data.message);
     process.exit(0);
 }
 
-async function askHuman(message: string) {
+async function askHuman(lastEvent: Event): Promise<Event> {
+    if (process.env.HUMANLAYER_API_KEY) {
+        return await askHumanEmail(lastEvent);
+    } else {
+        return await askHumanCLI(lastEvent.data.message);
+    }
+}
+
+async function askHumanCLI(message: string): Promise<Event> {
     const readline = require('readline').createInterface({
         input: process.stdin,
     return new Promise((resolve) => {
         readline.question(`${message}\n> `, (answer: string) => {
-            resolve(answer);
+            resolve({ type: "human_response", data: answer });
         });
     });
 }
+
+export async function askHumanEmail(lastEvent: Event): Promise<Event> {
+    if (!process.env.HUMANLAYER_EMAIL) {
+        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+    }
+    const hl = humanlayer({ //reads apiKey from env
+        // name of this agent
+        runId: "12fa-cli-agent",
+        verbose: true,
+        contactChannel: {
+            // agent should request permission via email
+            email: {
+                address: process.env.HUMANLAYER_EMAIL,
+            }
+        }
+    }) 
+
+    if (lastEvent.data.intent === "divide") {
+        // fetch approval synchronously - this will block until reply
+        const response = await hl.fetchHumanApproval({
+            spec: {
+                fn: "divide",
+                kwargs: {
+                    a: lastEvent.data.a,
+                    b: lastEvent.data.b
+                }
+            }
+        })
+
+        if (response.approved) {
+            const result = lastEvent.data.a / lastEvent.data.b;
+            console.log("tool_response", result);
+            return {
+                "type": "tool_response",
+                "data": result
+            };
+        } else {
+            return {
+                "type": "tool_response",
+                "data": `user denied operation ${lastEvent.data.intent}
+                with feedback: ${response.comment}`
+            };
+        }
+    }
+    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
+}
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/11-cli.ts src/cli.ts

</details>

Run the CLI

    npx tsx src/index.ts 'can you divide 4 by 5'

The last line of your program should mention human review step

    nextStep { intent: 'divide', a: 4, b: 5 }
HumanLayer: Requested human approval from HumanLayer cloud

go ahead and respond to the email with some feedback:

![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true)


you should get another email with an updated attempt based on your feedback!

You can go ahead and approve this one:

![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true)


and your final output will look like

    nextStep {
 intent: 'done_for_now',
 message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
}
The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!

lets implement the `request_more_information` flow as well


```diff
src/cli.ts
     }) 
 
+    if (lastEvent.data.intent === "request_more_information") {
+        // fetch response synchronously - this will block until reply
+        const response = await hl.fetchHumanResponse({
+            spec: {
+                msg: lastEvent.data.message
+            }
+        })
+        return {
+            "type": "tool_response",
+            "data": response
+        }
+    }
+    
     if (lastEvent.data.intent === "divide") {
         // fetch approval synchronously - this will block until reply
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/11b-cli.ts src/cli.ts

</details>

lets test the require_approval flow as by asking for a calculation
with garbled input:


    npx tsx src/index.ts 'can you multiply 4 and xyz'

You should get an email with a request for clarification

    Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?

you can response with something like

    use 8 instead of xyz

you should see a final result on the CLI like

    I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.

as a final step, lets explore using a custom html template for the email


```diff
src/cli.ts
             email: {
                 address: process.env.HUMANLAYER_EMAIL,
+                // custom email body - jinja
+                template: `{% if type == 'request_more_information' %}
+{{ event.spec.msg }}
+{% else %}
+agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
+with args: {{event.spec.kwargs}}
+<br><br>
+reply to this email to approve
+{% endif %}`
             }
         }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/11c-cli.ts src/cli.ts

</details>

first try with divide:


    npx tsx src/index.ts 'can you divide 4 by 5'

you should see a slightly different email with the custom template

![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true)

feel free to run with the flow and then you can try updating the template to your liking

(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)

try triggering "request_more_information" as well!


thats it - in the next chapter, we'll build a fully email-driven 
workflow agent that uses webhooks for human approval 


## Chapter XX - HumanLayer Webhook Integration

the previous sections used the humanlayer SDK in "synchronous mode" - that 
means every time we wait for human approval, we sit in a loop 
polling until the human response if received.

That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results. 


add code to initialize humanlayer in the server


```diff
src/server.ts
 import { Thread, agentLoop, handleNextStep } from '../src/agent';
 import { ThreadStore } from '../src/state';
+import { humanlayer } from 'humanlayer';
 
 const app = express();
 const store = new ThreadStore();
 
+const getHumanlayer = () => {
+    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
+    if (!HUMANLAYER_EMAIL) {
+        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+    }
+
+    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
+    if (!HUMANLAYER_API_KEY) {
+        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
+    }
+    return humanlayer({
+        runId: `12fa-agent`,
+        contactChannel: {
+            email: { address: HUMANLAYER_EMAIL }
+        }
+    });
+}
+
 // POST /thread - Start new thread
 app.post('/thread', async (req, res) => {
     
     // loop until stop event
-    const newThread = await agentLoop(thread);
+    const result = await agentLoop(thread);
 
-    store.update(req.params.id, newThread);
+    store.update(req.params.id, result);
 
-    lastEvent = newThread.events[newThread.events.length - 1];
+    lastEvent = result.events[result.events.length - 1];
     lastEvent.data.response_url = `/thread/${req.params.id}/response`;
 
     console.log("returning last event from endpoint", lastEvent);
     
-    res.json(newThread);
+    res.json(result);
 });
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/12-1-server-init.ts src/server.ts

</details>

next, lets update the /thread endpoint to 
  
1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls


Update the server to be able to handle request_clarification responses

- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response


```diff
src/server.ts
-import express from 'express';
+import express, { Request, Response } from 'express';
 import { Thread, agentLoop, handleNextStep } from '../src/agent';
 import { ThreadStore } from '../src/state';
-import { humanlayer } from 'humanlayer';
+import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
 
 const app = express();
     });
 }
-
 // POST /thread - Start new thread
-app.post('/thread', async (req, res) => {
+app.post('/thread', async (req: Request, res: Response) => {
     const thread = new Thread([{
         type: "user_input",
     }]);
     
-    const threadId = store.create(thread);
-    const newThread = await agentLoop(thread);
-    
-    store.update(threadId, newThread);
+    // run agent loop asynchronously, return immediately
+    Promise.resolve().then(async () => {
+        const threadId = store.create(thread);
+        const newThread = await agentLoop(thread);
+        
+        store.update(threadId, newThread);
 
-    const lastEvent = newThread.events[newThread.events.length - 1];
-    // If we exited the loop, include the response URL so the client can
-    // push a new message onto the thread
-    lastEvent.data.response_url = `/thread/${threadId}/response`;
+        const lastEvent = newThread.events[newThread.events.length - 1];
 
-    console.log("returning last event from endpoint", lastEvent);
-
-    res.json({ 
-        thread_id: threadId,
-        ...newThread 
+        if (thread.awaitingHumanResponse()) {
+            const hl = getHumanlayer();
+            // create a human contact - returns immediately
+            hl.createHumanContact({
+                spec: {
+                    msg: lastEvent.data.message,
+                    state: {
+                        thread_id: threadId,
+                    }
+                }
+            });
+        }
     });
+
+    res.json({ status: "processing" });
 });
 
 // GET /thread/:id - Get thread status
-app.get('/thread/:id', (req, res) => {
+app.get('/thread/:id', (req: Request, res: Response) => {
     const thread = store.get(req.params.id);
     if (!thread) {
 });
 
+type WebhookResponse = V1Beta2HumanContactCompleted;
 
-type ApprovalPayload = {
-    type: "approval";
-    approved: boolean;
-    comment?: string;
-}
+const handleHumanResponse = async (req: Request, res: Response) => {
 
-type ResponsePayload = {
-    type: "response";
-    response: string;
 }
 
-type Payload = ApprovalPayload | ResponsePayload;
+app.post('/webhook', async (req: Request, res: Response) => {
+    console.log("webhook response", req.body);
+    const response = req.body as WebhookResponse;
 
-// POST /thread/:id/response - Handle clarification response
-app.post('/thread/:id/response', async (req, res) => {
-    let thread = store.get(req.params.id);
+    // response is guaranteed to be set on a webhook
+    const humanResponse: string = response.event.status?.response as string;
+
+    const threadId = response.event.spec.state?.thread_id;
+    if (!threadId) {
+        return res.status(400).json({ error: "Thread ID not found" });
+    }
+
+    const thread = store.get(threadId);
     if (!thread) {
         return res.status(404).json({ error: "Thread not found" });
     }
 
-    const body: Payload = req.body;
-
-    let lastEvent = thread.events[thread.events.length - 1];
-
-    if (thread.awaitingHumanResponse() && body.type === 'response') {
-        thread.events.push({
-            type: "human_response",
-            data: body.response
-        });
-    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
-        // push feedback onto the thread
-        thread.events.push({
-            type: "tool_response",
-            data: `user denied the operation with feedback: "${body.comment}"`
-        });
-    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
-        // approved, run the tool, pushing results onto the thread
-        await handleNextStep(lastEvent.data, thread);
-    } else {
-        res.status(400).json({
-            error: "Invalid request: " + body.type,
-            awaitingHumanResponse: thread.awaitingHumanResponse(),
-            awaitingHumanApproval: thread.awaitingHumanApproval()
-        });
-        return;
+    if (!thread.awaitingHumanResponse()) {
+        return res.status(400).json({ error: "Thread is not awaiting human response" });
     }
 
-    
-    // loop until stop event
-    const result = await agentLoop(thread);
-
-    store.update(req.params.id, result);
-
-    lastEvent = result.events[result.events.length - 1];
-    lastEvent.data.response_url = `/thread/${req.params.id}/response`;
-
-    console.log("returning last event from endpoint", lastEvent);
-    
-    res.json(result);
 });
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/12a-server.ts src/server.ts

</details>

Start the server in another terminal

    npx tsx src/server.ts

now that the server is running, send a payload to the '/thread' endpoint


__ do the response step

__ now handle approvals for divide

__ now also handle done_for_now


================================================
FILE: workshops/2025-05/walkthrough.yaml
================================================
title: "Building the 12-factor agent template from scratch"
text: "Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology."

targets:
  - markdown: "./build/walkthrough.md"
    onChange:
      diff: true
      cp: true
    newFiles:
      cat: false
      cp: true
  - folders:
      path: "./build/sections"
      skip:
        - "cleanup"
      final:
        dirName: "final"

sections:
  - name: cleanup
    title: "Cleanup"
    text: "Make sure you're starting from a clean slate"
    steps:
      - text: "Clean up existing files"
        command: |
          rm -rf baml_src/ && rm -rf src/

  - name: hello-world
    title: "Chapter 0 - Hello World"
    text: "Let's start with a basic TypeScript setup and a hello world program."
    steps:
      - text: |
          This guide is written in TypeScript (yes, a python version is coming soon)

          There are many checkpoints between the every file edit in theworkshop steps, 
          so even if you aren't super familiar with typescript,
          you should be able to keep up and run each example.

          To run this guide, you'll need a relatively recent version of nodejs and npm installed

          You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine

        command:
            brew install node@20
        results:
          - text: "You should see the node version"
            code: |
              node --version

      - text: "Copy initial package.json"
        file: {src: ./walkthrough/00-package.json, dest: package.json}
      - text: "Install dependencies"
        command: |
          npm install
        incremental: true
      - text: "Copy tsconfig.json"
        file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json}
      - text: "add .gitignore"
        file: {src: ./walkthrough/00-.gitignore, dest: .gitignore}
      - text: "Create src folder"
        dir: {create: true, path: src}
      - text: "Add a simple hello world index.ts"
        file: {src: ./walkthrough/00-index.ts, dest: src/index.ts}
      - text: "Run it to verify"
        command: |
          npx tsx src/index.ts
        results:
          - text: "You should see:"
            code: |
              hello, world!

  - name: cli-and-agent
    title: "Chapter 1 - CLI and Agent Loop"
    text: "Now let's add BAML and create our first agent with a CLI interface."
    steps:
      - text: |
           First, we'll need to install [BAML](https://github.com/boundaryml/baml)
           which is a tool for prompting and structured outputs.
        command: |
          npm install @boundaryml/baml
        incremental: true
      - text: "Initialize BAML"
        command: |
          npx baml-cli init
        incremental: true
      - text: "Remove default resume.baml"
        command: |
          rm baml_src/resume.baml
        incremental: true
      - text: "Add our starter agent, a single baml prompt that we'll build on"
        file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
      - text: "Generate BAML client code"
        command: |
          npx baml-cli generate
        incremental: true
      - text: "Enable BAML logging for this section"
        command: |
          export BAML_LOG=debug
      - text: "Add the CLI interface"
        file: {src: ./walkthrough/01-cli.ts, dest: src/cli.ts}
      - text: "Update index.ts to use the CLI"
        file: {src: ./walkthrough/01-index.ts, dest: src/index.ts}
      - text: "Add the agent implementation"
        file: {src: ./walkthrough/01-agent.ts, dest: src/agent.ts}
      - text: |
          The the BAML code is configured to use OPENAI_API_KEY by default

          As you're testing, you can change the model / provider to something else
          as you please

                  client "openai/gpt-4o"

          [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)

          For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) 
          or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.

          If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key.
          
        command: |
          export OPENAI_API_KEY=...
      - text: "Try it out"
        command: |
          npx tsx src/index.ts hello
        results:
          - text: you should see a familiar response from the model 
            code: |
              {
                intent: 'done_for_now',
                message: 'Hello! How can I assist you today?'
              }

  - name: calculator-tools
    title: "Chapter 2 - Add Calculator Tools"
    text: "Let's add some calculator tools to our agent."
    steps:
      - text: |
          Let's start by adding a tool definition for the calculator

          These are simpile structured outputs that we'll ask the model to 
          return as a "next step" in the agentic loop.

        file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml}
      - text: |
          Now, let's update the agent's DetermineNextStep method to
          expose the calculator tools as potential next steps

        file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml}
      - text: "Generate updated BAML client"
        command: |
          npx baml-cli generate
        incremental: true
      - text: "Try out the calculator"
        command: |
          npx tsx src/index.ts 'can you add 3 and 4'
        results:
          - text: "You should see a tool call to the calculator"
            code: |
              {
                intent: 'add',
                a: 3,
                b: 4
              }

  - name: tool-loop
    title: "Chapter 3 - Process Tool Calls in a Loop"
    text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
    steps:
      - text: |
          First, lets update the agent to handle the tool call
        file: {src: ./walkthrough/03-agent.ts, dest: src/agent.ts}
      - text: |
          Now, lets try it out
        command: |
          npx tsx src/index.ts 'can you add 3 and 4'
        results:
          - text: you should see the agent call the tool and then return the result
            code: |
              {
                intent: 'done_for_now',
                message: 'The sum of 3 and 4 is 7.'
              }
      - text: "For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output"
        command: |
          export BAML_LOG=off
      - text: "Try a multi-step calculation"
        command: |
          npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
      - text: "you'll notice that tools like multiply and divide are not available"
        command: |
          npx tsx src/index.ts 'can you multiply 3 and 4'
      - text: |
          next, let's add handlers for the rest of the calculator tools
        file: {src: ./walkthrough/03b-agent.ts, dest: src/agent.ts}
      - text: "Test subtraction"
        command: |
          npx tsx src/index.ts 'can you subtract 3 from 4'
      - text: |
          now, let's test the multiplication tool
        command: |
          npx tsx src/index.ts 'can you multiply 3 and 4'
      - text: |
          finally, let's test a more complex calculation with multiple operations
        command: |
          npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

  - name: baml-tests
    title: "Chapter 4 - Add Tests to agent.baml"
    text: "Let's add some tests to our BAML agent."
    steps:
      - text: to start, leave the baml logs enabled
        command: |
          export BAML_LOG=debug
      - text: |
          next, let's add some tests to the agent

          We'll start with a simple test that checks the agent's ability to handle
          a basic calculation.
          
        file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml}
      - text: "Run the tests"
        command: |
          npx baml-cli test
      - text: |
          now, let's improve the test with assertions!

          Assertions are a great way to make sure the agent is working as expected,
          and can easily be extended to check for more complex behavior.

        file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml}
      - text: "Run the tests"
        command: |
          npx baml-cli test
      - text: |
          as you add more tests, you can disable the logs to keep the output clean. 
          You may want to turn them on as you iterate on specific tests.
        command: |
          export BAML_LOG=off
      - text: |
          now, let's add some more complex test cases,
          where we resume from in the middle of an in-progress
          agentic context window

          
        file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml}
      - text: |
          let's try to run it
        command: |
          npx baml-cli test

  - name: human-tools
    title: "Chapter 5 - Multiple Human Tools"
    text: |
      In this section, we'll add support for multiple tools that serve to 
      contact humans.


    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: |
          first, let's add a tool that can request clarification from a human 

          this will be different from the "done_for_now" tool,
          and can be used to more flexibly handle different types of human interactions
          in your agent.

        file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml}
      - text: |
          next, let's re-generate the client code

          NOTE - if you're using the VSCode extension for BAML,
          the client will be regenerated automatically when you save the file
          in your editor.

        command: |
          npx baml-cli generate
        incremental: true
      - text: |
          now, let's update the agent to use the new tool

        file: {src: ./walkthrough/05-agent.ts, dest: src/agent.ts}
      - text: |
          next, let's update the CLI to handle clarification requests
          by requesting input from the user on the CLI

        file: {src: ./walkthrough/05-cli.ts, dest: src/cli.ts}
      - text: |
          let's try it out

        command: |
          npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '
      - text: |
          next, let's add a test that checks the agent's ability to handle
          a clarification request
          
        file: {src: ./walkthrough/05b-agent.baml, dest: baml_src/agent.baml}
      - text: |
          and now we can run the tests again
        command: |
          npx baml-cli test
      - text: |
          you'll notice the new test passes, but the hello world test fails

          This is because the agent's default behavior is to return "done_for_now"

        file: {src: ./walkthrough/05c-agent.baml, dest: baml_src/agent.baml}
      - text: "Verify tests pass"
        command: |
          npx baml-cli test

  - name: customize-prompt
    title: "Chapter 6 - Customize Your Prompt with Reasoning"
    text: |
      In this section, we'll explore how to customize the prompt of the agent
      with reasoning steps.

      this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)

      there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)
      
    steps:
      - text: "for this section, it will be helpful to leave the baml logs enabled"
        command: |
          export BAML_LOG=debug
      - text: |
          update the agent prompt to include a reasoning step
        file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
      - text: generate the updated client
        command: |
          npx baml-cli generate
        incremental: true
      - text: | 
          now, you can try it out with a simple prompt
        command: |
          npx tsx src/index.ts 'can you multiply 3 and 4'
        results:
          - text: you should see output from the baml logs showing the reasoning steps
      - text: |
           #### optional challenge 

           add a field to your tool output format that includes the reasoning steps in the output!

  - name: context-window
    title: "Chapter 7 - Customize Your Context Window"
    text: |
      In this section, we'll explore how to customize the context window
      of the agent.

      this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
      
    steps:
      - text: |
          update the agent to pretty-print the Context window for the model
        file: {src: ./walkthrough/07-agent.ts, dest: src/agent.ts}
      - text: "Test the formatting"
        command: |
          BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
      - text: |
          next, let's update the agent to use XML formatting instead 

          this is a very popular format for passing data to a model,

          among other things, because of the token efficiency of XML.

        file: {src: ./walkthrough/07b-agent.ts, dest: src/agent.ts}
      - text: |
          let's try it out
        command: |
          BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
      - text: |
          lets update our tests to match the new output format
        file: {src: ./walkthrough/07c-agent.baml, dest: baml_src/agent.baml}
      - text: |
          check out the updated tests
        command: |
          npx baml-cli test

  - name: api-endpoints
    title: "Chapter 8 - Adding API Endpoints"
    text: "Add an Express server to expose the agent via HTTP."
    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: "Install Express and types"
        command: |
          npm install express && npm install --save-dev @types/express supertest
        incremental: true
      - text: "Add the server implementation"
        file: {src: ./walkthrough/08-server.ts, dest: src/server.ts}
      - text: "Start the server"
        command: |
          npx tsx src/server.ts
      - text: "Test with curl (in another terminal)"
        command: |
          curl -X POST http://localhost:3000/thread \
            -H "Content-Type: application/json" \
            -d '{"message":"can you add 3 and 4"}'
        results:
          - text: |
             You should get an answer from the agent which includes the
             agentic trace, ending in a message like: 

            code: |
              {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}

  - name: state-management
    title: "Chapter 9 - In-Memory State and Async Clarification"
    text: "Add state management and async clarification support."
    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: "Add some simple in-memory state management for threads"
        file: {src: ./walkthrough/09-state.ts, dest: src/state.ts}
      - text: |
          update the server to use the state management

          * Add thread state management using `ThreadStore`
          * return thread IDs and response URLs from the /thread endpoint
          * implement GET /thread/:id 
          * implement POST /thread/:id/response
        file: {src: ./walkthrough/09-server.ts, dest: src/server.ts}
      - text: "Start the server"
        command: |
          npx tsx src/server.ts
      - text: "Test clarification flow"
        command: |
          curl -X POST http://localhost:3000/thread \
            -H "Content-Type: application/json" \
            -d '{"message":"can you multiply 3 and xyz"}'

  - name: human-approval
    title: "Chapter 10 - Adding Human Approval"
    text: "Add support for human approval of operations."
    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: |
          update the server to handle human approvals

          * Import `handleNextStep` to execute approved actions
          * Add two payload types to distinguish approvals from responses
          * Handle responses and approvals differently in the endpoint
          * Show better error messages when things go wrongs

        file: {src: ./walkthrough/10-server.ts, dest: src/server.ts}
      - text: "Add a few methods to the agent to handle approvals and responses"
        file: {src: ./walkthrough/10-agent.ts, dest: src/agent.ts}
      - text: "Start the server"
        command: |
          npx tsx src/server.ts
      - text: "Test division with approval"
        command: |
          curl -X POST http://localhost:3000/thread \
            -H "Content-Type: application/json" \
            -d '{"message":"can you divide 3 by 4"}'
        results:
          - text: "You should see:"
            code: |
              {
                "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
                "events": [
                  {
                    "type": "user_input",
                    "data": "can you divide 3 by 4"
                  },
                  {
                    "type": "tool_call",
                    "data": {
                      "intent": "divide",
                      "a": 3,
                      "b": 4,
                      "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
                    }
                  }
                ]
              }
      - text: "reject the request with another curl call, changing the thread ID"
        command: |
          curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
            -H "Content-Type: application/json" \
            -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'
        results:
          - text: 'You should see: the last tool call is now `"intent":"divide","a":3,"b":5`'
            code: |
              {
                "events": [
                  {
                    "type": "user_input",
                    "data": "can you divide 3 by 4"
                  },
                  {
                    "type": "tool_call",
                    "data": {
                      "intent": "divide",
                      "a": 3,
                      "b": 4,
                      "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
                    }
                  },
                  {
                    "type": "tool_response",
                    "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
                  },
                  {
                    "type": "tool_call",
                    "data": {
                      "intent": "divide",
                      "a": 3,
                      "b": 5,
                      "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
                    }
                  }
                ]
              }
      - text: "now you can approve the operation"
        command: |
          curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
            -H "Content-Type: application/json" \
            -d '{"type": "approval", "approved": true}'
        results:
          - text: "you should see the final message includes the tool response and final result!"
            code: |
              ...
              {
                "type": "tool_response",
                "data": 0.5
              },
              {
                "type": "done_for_now",
                "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
                "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
              }

  - name: humanlayer-approval
    title: "Chapter 11 - Human Approvals over email"
    text: |
      in this section, we'll add support for human approvals over email.

      This will start a little bit contrived, just to get the concepts down - 

      We'll start by invoking the workflow from the CLI but approvals for `divide`
      and `request_more_information` will be handled over email,
      then the final `done_for_now` answer will be printed back to the CLI

      While contrived, this is a great example of the flexibility you get from
      [factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)

    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: "Install HumanLayer"
        command: |
          npm install humanlayer
        incremental: true
      - text: "Update CLI to send `divide` and `request_more_information` to a human via email"
        file: {src: ./walkthrough/11-cli.ts, dest: src/cli.ts}
      - text: "Run the CLI"
        command: |
          npx tsx src/index.ts 'can you divide 4 by 5'
        results:
          - text: "The last line of your program should mention human review step"
            code: |
              nextStep { intent: 'divide', a: 4, b: 5 }
              HumanLayer: Requested human approval from HumanLayer cloud
      - text: |
          go ahead and respond to the email with some feedback:

          ![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true)
      - text: |
          you should get another email with an updated attempt based on your feedback!

          You can go ahead and approve this one:

          ![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true)
        results:
          - text: and your final output will look like
            code: |
              nextStep {
               intent: 'done_for_now',
               message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
              }
              The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!
      - text: |
          lets implement the `request_more_information` flow as well
        file: {src: ./walkthrough/11b-cli.ts, dest: src/cli.ts}
      - text: |
          lets test the require_approval flow as by asking for a calculation
          with garbled input:
        command: |
          npx tsx src/index.ts 'can you multiply 4 and xyz'
      - text: "You should get an email with a request for clarification"
        command: |
          Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?
      - text: you can response with something like
        command: |
          use 8 instead of xyz
        results:
          - text: you should see a final result on the CLI like
            code: |
              I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.
      - text: |
          as a final step, lets explore using a custom html template for the email
        file: {src: ./walkthrough/11c-cli.ts, dest: src/cli.ts}
      - text: |
          first try with divide:
        command: |
          npx tsx src/index.ts 'can you divide 4 by 5'
        results: 
          - text: |
              you should see a slightly different email with the custom template

              ![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true)

              feel free to run with the flow and then you can try updating the template to your liking

              (if you're using cursor, something as simple as highlighting the template and asking to "make it better"
              should do the trick)

              try triggering "request_more_information" as well!
      - text: |
          thats it - in the next chapter, we'll build a fully email-driven 
          workflow agent that uses webhooks for human approval 

  - name: humanlayer-webhook
    title: "Chapter XX - HumanLayer Webhook Integration"
    text: |
      the previous sections used the humanlayer SDK in "synchronous mode" - that 
      means every time we wait for human approval, we sit in a loop 
      polling until the human response if received.

      That's obviously not ideal, especially for production workloads,
      so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
      by updating the server to end processing after contacting a human, and use webhooks to receive the results. 

    steps:
      - text: |
          add code to initialize humanlayer in the server
        file: {src: ./walkthrough/12-1-server-init.ts, dest: src/server.ts}
      - text: |
          next, lets update the /thread endpoint to 
            
          1. handle requests asynchronously, returning immediately
          2. create a human contact on request_more_information and done_for_now calls

        # file: {src: }
      - text: |
          Update the server to be able to handle request_clarification responses

          - remove the old /response endpoint and types
          - update the /thread endpoint to run processing asynchronously, return immediately
          - send a state.threadId when requesting human responses
          - add a handleHumanResponse function to process the human response
          - add a /webhook endpoint to handle the webhook response

        file: {src: ./walkthrough/12a-server.ts, dest: src/server.ts}
      - text: "Start the server in another terminal"
        command: |
          npx tsx src/server.ts
      - text: |
          now that the server is running, send a payload to the '/thread' endpoint
      - text: __ do the response step
      - text: __ now handle approvals for divide
      - text: __ now also handle done_for_now
        

================================================
FILE: workshops/2025-05-17/.gitignore
================================================
baml_src/*.baml
src/*.ts
package.json
package-lock.json
tsconfig.json
build/


================================================
FILE: workshops/2025-05-17/sections/00-hello-world/README.md
================================================
# Chapter 0 - Hello World

Let's start with a basic TypeScript setup and a hello world program.

This guide is written in TypeScript (yes, a python version is coming soon)

There are many checkpoints between the every file edit in theworkshop steps, 
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.

To run this guide, you'll need a relatively recent version of nodejs and npm installed

You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine


    brew install node@20

You should see the node version

    node --version

Copy initial package.json

    cp ./walkthrough/00-package.json package.json

<details>
<summary>show file</summary>

```json
// ./walkthrough/00-package.json
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
```

</details>

Install dependencies

    npm install

Copy tsconfig.json

    cp ./walkthrough/00-tsconfig.json tsconfig.json

<details>
<summary>show file</summary>

```json
// ./walkthrough/00-tsconfig.json
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
```

</details>

add .gitignore

    cp ./walkthrough/00-.gitignore .gitignore

<details>
<summary>show file</summary>

```gitignore
// ./walkthrough/00-.gitignore
baml_client/
node_modules/
```

</details>

Create src folder

    mkdir -p src

Add a simple hello world index.ts

    cp ./walkthrough/00-index.ts src/index.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/00-index.ts
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)
```

</details>

Run it to verify

    npx tsx src/index.ts

You should see:

    hello, world!


================================================
FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-index.ts
================================================
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
  

================================================
FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/README.md
================================================
# Chapter 1 - CLI and Agent Loop

Now let's add BAML and create our first agent with a CLI interface.

First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.


    npm install @boundaryml/baml

Initialize BAML

    npx baml-cli init

Remove default resume.baml

    rm baml_src/resume.baml

Add our starter agent, a single baml prompt that we'll build on

    cp ./walkthrough/01-agent.baml baml_src/agent.baml

<details>
<summary>show file</summary>

```rust
// ./walkthrough/01-agent.baml
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client Qwen3

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        /nothink 

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}
```

</details>

Generate BAML client code

    npx baml-cli generate

Enable BAML logging for this section

    export BAML_LOG=debug

Add the CLI interface

    cp ./walkthrough/01-cli.ts src/cli.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/01-cli.ts
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}
```

</details>

Update index.ts to use the CLI

```diff
src/index.ts
+import { cli } from "./cli"
+
 async function hello(): Promise<void> {
     console.log('hello, world!')
 
 async function main() {
-    await hello()
+    await cli()
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/01-index.ts src/index.ts

</details>

Add the agent implementation

    cp ./walkthrough/01-agent.ts src/agent.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/01-agent.ts
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}
```

</details>

The the BAML code is configured to use BASETEN_API_KEY by default

To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co),
and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/).

```rust 
  function DetermineNextStep(thread: string) -> DoneForNow {
      client Qwen3
      // ...
```

If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key.

If you want to try swapping out the model, you can change the `client` line.

[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)

For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) 
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.

For example, to use openai with an OPENAI_API_KEY, you can do:

    client "openai/gpt-4o"


Set your env vars

    export BASETEN_API_KEY=...
    export BASETEN_BASE_URL=...

Try it out

    npx tsx src/index.ts hello

you should see a familiar response from the model

    {
      intent: 'done_for_now',
      message: 'Hello! How can I assist you today?'
    }


================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/src/index.ts
================================================
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client Qwen3

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        /nothink 

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}


================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/README.md
================================================
# Chapter 2 - Add Calculator Tools

Let's add some calculator tools to our agent.

Let's start by adding a tool definition for the calculator

These are simpile structured outputs that we'll ask the model to 
return as a "next step" in the agentic loop.


    cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml

<details>
<summary>show file</summary>

```rust
// ./walkthrough/02-tool_calculator.baml
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}
```

</details>

Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps


```diff
baml_src/agent.baml
 function DetermineNextStep(
     thread: string 
-) -> DoneForNow {
+) -> CalculatorTools | DoneForNow {
     client Qwen3
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/02-agent.baml baml_src/agent.baml

</details>

Generate updated BAML client

    npx baml-cli generate

Try out the calculator

    npx tsx src/index.ts 'can you add 3 and 4'

You should see a tool call to the calculator

    {
      intent: 'add',
      a: 3,
      b: 4
    }


================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/baml_src/agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client Qwen3

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        /nothink 

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.88.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/src/agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}


================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/walkthrough/02-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client Qwen3

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        /nothink 

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/walkthrough/02-tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/README.md
================================================
# Chapter 3 - Process Tool Calls in a Loop

Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.

First, lets update the agent to handle the tool call


```diff
src/agent.ts
 }
 
-// right now this just runs one turn with the LLM, but
-// we'll update this function to handle all the agent logic
-export async function agentLoop(thread: Thread): Promise<AgentResponse> {
-    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
-    return nextStep;
+
+
+export async function agentLoop(thread: Thread): Promise<string> {
+
+    while (true) {
+        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
+        console.log("nextStep", nextStep);
+
+        switch (nextStep.intent) {
+            case "done_for_now":
+                // response to human, return the next step object
+                return nextStep.message;
+            case "add":
+                thread.events.push({
+                    "type": "tool_call",
+                    "data": nextStep
+                });
+                const result = nextStep.a + nextStep.b;
+                console.log("tool_response", result);
+                thread.events.push({
+                    "type": "tool_response",
+                    "data": result
+                });
+                continue;
+            default:
+                throw new Error(`Unknown intent: ${nextStep.intent}`);
+        }
+    }
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/03-agent.ts src/agent.ts

</details>

Now, lets try it out


    npx tsx src/index.ts 'can you add 3 and 4'

you should see the agent call the tool and then return the result

    {
      intent: 'done_for_now',
      message: 'The sum of 3 and 4 is 7.'
    }

For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output

    export BAML_LOG=off

Try a multi-step calculation

    npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'

you'll notice that tools like multiply and divide are not available

    npx tsx src/index.ts 'can you multiply 3 and 4'

next, let's add handlers for the rest of the calculator tools


```diff
src/agent.ts
-import { b } from "../baml_client";
+import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
 
-// tool call or a respond to human tool
-type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
-
 export interface Event {
     type: string
 }
 
+export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
 
+export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
+    let result: number;
+    switch (nextStep.intent) {
+        case "add":
+            result = nextStep.a + nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "subtract":
+            result = nextStep.a - nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "multiply":
+            result = nextStep.a * nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "divide":
+            result = nextStep.a / nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+    }
+}
 
 export async function agentLoop(thread: Thread): Promise<string> {
         console.log("nextStep", nextStep);
 
+        thread.events.push({
+            "type": "tool_call",
+            "data": nextStep
+        });
+
         switch (nextStep.intent) {
             case "done_for_now":
                 return nextStep.message;
             case "add":
-                thread.events.push({
-                    "type": "tool_call",
-                    "data": nextStep
-                });
-                const result = nextStep.a + nextStep.b;
-                console.log("tool_response", result);
-                thread.events.push({
-                    "type": "tool_response",
-                    "data": result
-                });
-                continue;
-            default:
-                throw new Error(`Unknown intent: ${nextStep.intent}`);
+            case "subtract":
+            case "multiply":
+            case "divide":
+                thread = await handleNextStep(nextStep, thread);
         }
     }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/03b-agent.ts src/agent.ts

</details>

Test subtraction

    npx tsx src/index.ts 'can you subtract 3 from 4'

now, let's test the multiplication tool


    npx tsx src/index.ts 'can you multiply 3 and 4'

finally, let's test a more complex calculation with multiple operations


    npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

congratulations, you've taking your first step into hand-rolling an agent loop.

from here, we're going to start incorporating some more intermediate and advanced
concepts for 12-factor agents.


================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/baml_src/agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client Qwen3

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        /nothink 

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview

client<llm> CustomGPT4o {
  provider openai
  options {
    model "gpt-4o"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomGPT4oMini {
  provider openai
  retry_policy Exponential
  options {
    model "gpt-4o-mini"
    api_key env.OPENAI_API_KEY
  }
}

client<llm> CustomSonnet {
  provider anthropic
  options {
    model "claude-3-5-sonnet-20241022"
    api_key env.ANTHROPIC_API_KEY
  }
}


client<llm> CustomHaiku {
  provider anthropic
  retry_policy Constant
  options {
    model "claude-3-haiku-20240307"
    api_key env.ANTHROPIC_API_KEY
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client<llm> CustomFast {
  provider round-robin
  options {
    // This will alternate between the two clients
    strategy [CustomGPT4oMini, CustomHaiku]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/fallback
client<llm> OpenaiFallback {
  provider fallback
  options {
    // This will try the clients in order until one succeeds
    strategy [CustomGPT4oMini, CustomGPT4oMini]
  }
}

// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
  max_retries 3
  // Strategy is optional
  strategy {
    type constant_delay
    delay_ms 200
  }
}

retry_policy Exponential {
  max_retries 2
  // Strategy is optional
  strategy {
    type exponential_backoff
    delay_ms 300
    multiplier 1.5
    max_delay_ms 10000
  }
}

================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
    // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
    output_type "typescript"

    // Where the generated code will be saved (relative to baml_src/)
    output_dir "../"

    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
    // The BAML VSCode extension version should also match this version.
    version "0.88.0"

    // Valid values: "sync", "async"
    // This controls what `b.FunctionName()` will be (sync or async).
    default_client_mode async
}


================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/src/agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}


================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/src/index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/walkthrough/03-agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}


export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
                thread.events.push({
                    "type": "tool_call",
                    "data": nextStep
                });
                const result = nextStep.a + nextStep.b;
                console.log("tool_response", result);
                thread.events.push({
                    "type": "tool_response",
                    "data": result
                });
                continue;
            default:
                throw new Error(`Unknown intent: ${nextStep.intent}`);
        }
    }
}


================================================
FILE: workshops/2025-05-17/sections/03-tool-loop/walkthrough/03b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05-17/walkthrough/00-.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-05-17/walkthrough/00-index.ts
================================================
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05-17/walkthrough/00-package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
  

================================================
FILE: workshops/2025-05-17/walkthrough/00-tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-05-17/walkthrough/01-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client Qwen3
    // client "openai/gpt-4o"

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        /nothink 

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05-17/walkthrough/01-agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}


================================================
FILE: workshops/2025-05-17/walkthrough/01-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}


================================================
FILE: workshops/2025-05-17/walkthrough/01-index.ts
================================================
import { cli } from "./cli"

async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await cli()
}

main().catch(console.error)

================================================
FILE: workshops/2025-05-17/walkthrough/02-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client Qwen3

    // client "openai/gpt-4o"

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        /nothink 

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-05-17/walkthrough/02-tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-05-17/walkthrough/03-agent.ts
================================================
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}


export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
                thread.events.push({
                    "type": "tool_call",
                    "data": nextStep
                });
                const result = nextStep.a + nextStep.b;
                console.log("tool_response", result);
                thread.events.push({
                    "type": "tool_response",
                    "data": result
                });
                continue;
            default:
                throw new Error(`Unknown intent: ${nextStep.intent}`);
        }
    }
}


================================================
FILE: workshops/2025-05-17/walkthrough/03b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05-17/walkthrough/04-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client Qwen3
    // client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        /nothink

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
}


================================================
FILE: workshops/2025-05-17/walkthrough/04b-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client Qwen3
    // client "openai/gpt-4o" 

    prompt #"
        {{ _.role("system") }}

        /nothink

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(hello, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(math_operation, {{this.intent == "multiply"}})
}


================================================
FILE: workshops/2025-05-17/walkthrough/04c-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}
function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client Qwen3

    // client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        /nothink

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


================================================
FILE: workshops/2025-05-17/walkthrough/05-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client Qwen3

    // client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        /nothink

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


================================================
FILE: workshops/2025-05-17/walkthrough/05-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05-17/walkthrough/05-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "../src/agent";


export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    let lastEvent = result.events.slice(-1)[0];

    while (lastEvent.data.intent === "request_more_information") {
        const message = await askHuman(lastEvent.data.message);
        thread.events.push({ type: "human_response", data: message });
        const result = await agentLoop(thread);
        lastEvent = result.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(message: string) {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve(answer);
        });
    });
}


================================================
FILE: workshops/2025-05-17/walkthrough/05b-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client Qwen3
    // client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        /nothink

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05-17/walkthrough/05c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
} 

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client Qwen3

    // client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        /nothink

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05-17/walkthrough/06-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client Qwen3

    // client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        /nothink

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        First, always plan out what to do next, for example:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-05-17/walkthrough/07-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events, null, 2);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05-17/walkthrough/07b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05-17/walkthrough/07c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client Qwen3

    // client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        /nothink

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-05-17/walkthrough/08-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    const result = await agentLoop(thread);
    res.json(result);
});

// GET /thread/:id - Get thread status 
app.get('/thread/:id', (req, res) => {
    // optional - add state
    res.status(404).json({ error: "Not implemented yet" });
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05-17/walkthrough/09-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    
    thread.events.push({
        type: "human_response",
        data: req.body.message
    });
    
    // loop until stop event
    const newThread = await agentLoop(thread);
    
    store.update(req.params.id, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05-17/walkthrough/09-state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}

================================================
FILE: workshops/2025-05-17/walkthrough/10-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }

    awaitingHumanResponse(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
    }

    awaitingHumanApproval(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return lastEvent.data.intent === 'divide';
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "divide":
                // divide is scary, return it for human approval
                return thread;
            case "add":
            case "subtract":
            case "multiply":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-05-17/walkthrough/10-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const newThread = await agentLoop(thread);

    store.update(req.params.id, newThread);

    lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05-17/walkthrough/11-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
            }
        }
    }) 

    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05-17/walkthrough/11b-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05-17/walkthrough/11c-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
                // custom email body - jinja
                template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
<br><br>
reply to this email to approve
{% endif %}`
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-05-17/walkthrough/12-1-server-init.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const result = await agentLoop(thread);

    store.update(req.params.id, result);

    lastEvent = result.events[result.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(result);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05-17/walkthrough/12-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();


const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05-17/walkthrough/12a-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05-17/walkthrough/12aa-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();


const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05-17/walkthrough/12b-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { V1Beta2EmailEventReceived, V1Beta2FunctionCallCompleted, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const result = await agentLoop(thread);

    store.update(req.params.id, result);

    lastEvent = result.events[result.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(result);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

app.post('/webhook/response', async (req, res) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

    thread.events.push({
        type: "human_response",
        data: response.event.status?.response
    });

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-05-17/walkthrough.md
================================================
# Building the 12-factor agent template from scratch

Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology.

## Cleanup

Make sure you're starting from a clean slate

Clean up existing files

    rm -rf baml_src/ && rm -rf src/

## Chapter 0 - Hello World

Let's start with a basic TypeScript setup and a hello world program.

This guide is written in TypeScript (yes, a python version is coming soon)

There are many checkpoints between the every file edit in theworkshop steps, 
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.

To run this guide, you'll need a relatively recent version of nodejs and npm installed

You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine


    brew install node@20

You should see the node version

    node --version

Copy initial package.json

    cp ./walkthrough/00-package.json package.json

<details>
<summary>show file</summary>

```json
// ./walkthrough/00-package.json
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
```

</details>

Install dependencies

    npm install

Copy tsconfig.json

    cp ./walkthrough/00-tsconfig.json tsconfig.json

<details>
<summary>show file</summary>

```json
// ./walkthrough/00-tsconfig.json
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
```

</details>

add .gitignore

    cp ./walkthrough/00-.gitignore .gitignore

<details>
<summary>show file</summary>

```gitignore
// ./walkthrough/00-.gitignore
baml_client/
node_modules/
```

</details>

Create src folder

Add a simple hello world index.ts

    cp ./walkthrough/00-index.ts src/index.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/00-index.ts
async function hello(): Promise<void> {
    console.log('hello, world!')
}

async function main() {
    await hello()
}

main().catch(console.error)
```

</details>

Run it to verify

    npx tsx src/index.ts

You should see:

    hello, world!

## Chapter 1 - CLI and Agent Loop

Now let's add BAML and create our first agent with a CLI interface.

First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.


    npm install @boundaryml/baml

Initialize BAML

    npx baml-cli init

Remove default resume.baml

    rm baml_src/resume.baml

Add our starter agent, a single baml prompt that we'll build on

    cp ./walkthrough/01-agent.baml baml_src/agent.baml

<details>
<summary>show file</summary>

```rust
// ./walkthrough/01-agent.baml
class DoneForNow {
  intent "done_for_now"
  message string 
}

client<llm> Qwen3 {
  provider "openai-generic"
  options {
    base_url env.BASETEN_BASE_URL
    api_key env.BASETEN_API_KEY 
  }
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client Qwen3
    // client "openai/gpt-4o"

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        /nothink 

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}
```

</details>

Generate BAML client code

    npx baml-cli generate

Enable BAML logging for this section

    export BAML_LOG=debug

Add the CLI interface

    cp ./walkthrough/01-cli.ts src/cli.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/01-cli.ts
// cli.ts lets you invoke the agent loop from the command line

import { agentLoop, Thread, Event } from "./agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    const result = await agentLoop(thread);
    console.log(result);
}
```

</details>

Update index.ts to use the CLI

```diff
src/index.ts
+import { cli } from "./cli"
+
 async function hello(): Promise<void> {
     console.log('hello, world!')
 
 async function main() {
-    await hello()
+    await cli()
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/01-index.ts src/index.ts

</details>

Add the agent implementation

    cp ./walkthrough/01-agent.ts src/agent.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/01-agent.ts
import { b } from "../baml_client";

// tool call or a respond to human tool
type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise<AgentResponse> {
    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
    return nextStep;
}
```

</details>

The the BAML code is configured to use BASETEN_API_KEY by default

To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co),
and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/).

```rust 
  function DetermineNextStep(thread: string) -> DoneForNow {
      client Qwen3
      // ...
```

If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key.

If you want to try swapping out the model, you can change the `client` line.

[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)

For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) 
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.

For example, to use openai with an OPENAI_API_KEY, you can do:

    client "openai/gpt-4o"


Set your env vars

    export BASETEN_API_KEY=...
export BASETEN_BASE_URL=...

Try it out

    npx tsx src/index.ts hello

you should see a familiar response from the model

    {
      intent: 'done_for_now',
      message: 'Hello! How can I assist you today?'
    }

## Chapter 2 - Add Calculator Tools

Let's add some calculator tools to our agent.

Let's start by adding a tool definition for the calculator

These are simpile structured outputs that we'll ask the model to 
return as a "next step" in the agentic loop.


    cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml

<details>
<summary>show file</summary>

```rust
// ./walkthrough/02-tool_calculator.baml
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}
```

</details>

Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps


```diff
baml_src/agent.baml
 function DetermineNextStep(
     thread: string 
-) -> DoneForNow {
+) -> CalculatorTools | DoneForNow {
     client Qwen3
+
     // client "openai/gpt-4o"
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/02-agent.baml baml_src/agent.baml

</details>

Generate updated BAML client

    npx baml-cli generate

Try out the calculator

    npx tsx src/index.ts 'can you add 3 and 4'

You should see a tool call to the calculator

    {
      intent: 'add',
      a: 3,
      b: 4
    }

## Chapter 3 - Process Tool Calls in a Loop

Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.

First, lets update the agent to handle the tool call


```diff
src/agent.ts
 }
 
-// right now this just runs one turn with the LLM, but
-// we'll update this function to handle all the agent logic
-export async function agentLoop(thread: Thread): Promise<AgentResponse> {
-    const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
-    return nextStep;
+
+
+export async function agentLoop(thread: Thread): Promise<string> {
+
+    while (true) {
+        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
+        console.log("nextStep", nextStep);
+
+        switch (nextStep.intent) {
+            case "done_for_now":
+                // response to human, return the next step object
+                return nextStep.message;
+            case "add":
+                thread.events.push({
+                    "type": "tool_call",
+                    "data": nextStep
+                });
+                const result = nextStep.a + nextStep.b;
+                console.log("tool_response", result);
+                thread.events.push({
+                    "type": "tool_response",
+                    "data": result
+                });
+                continue;
+            default:
+                throw new Error(`Unknown intent: ${nextStep.intent}`);
+        }
+    }
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/03-agent.ts src/agent.ts

</details>

Now, lets try it out


    npx tsx src/index.ts 'can you add 3 and 4'

you should see the agent call the tool and then return the result

    {
      intent: 'done_for_now',
      message: 'The sum of 3 and 4 is 7.'
    }

For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output

    export BAML_LOG=off

Try a multi-step calculation

    npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'

you'll notice that tools like multiply and divide are not available

    npx tsx src/index.ts 'can you multiply 3 and 4'

next, let's add handlers for the rest of the calculator tools


```diff
src/agent.ts
-import { b } from "../baml_client";
+import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
 
-// tool call or a respond to human tool
-type AgentResponse = Awaited<ReturnType<typeof b.DetermineNextStep>>;
-
 export interface Event {
     type: string
 }
 
+export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
 
+export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
+    let result: number;
+    switch (nextStep.intent) {
+        case "add":
+            result = nextStep.a + nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "subtract":
+            result = nextStep.a - nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "multiply":
+            result = nextStep.a * nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+        case "divide":
+            result = nextStep.a / nextStep.b;
+            console.log("tool_response", result);
+            thread.events.push({
+                "type": "tool_response",
+                "data": result
+            });
+            return thread;
+    }
+}
 
 export async function agentLoop(thread: Thread): Promise<string> {
         console.log("nextStep", nextStep);
 
+        thread.events.push({
+            "type": "tool_call",
+            "data": nextStep
+        });
+
         switch (nextStep.intent) {
             case "done_for_now":
                 return nextStep.message;
             case "add":
-                thread.events.push({
-                    "type": "tool_call",
-                    "data": nextStep
-                });
-                const result = nextStep.a + nextStep.b;
-                console.log("tool_response", result);
-                thread.events.push({
-                    "type": "tool_response",
-                    "data": result
-                });
-                continue;
-            default:
-                throw new Error(`Unknown intent: ${nextStep.intent}`);
+            case "subtract":
+            case "multiply":
+            case "divide":
+                thread = await handleNextStep(nextStep, thread);
         }
     }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/03b-agent.ts src/agent.ts

</details>

Test subtraction

    npx tsx src/index.ts 'can you subtract 3 from 4'

now, let's test the multiplication tool


    npx tsx src/index.ts 'can you multiply 3 and 4'

finally, let's test a more complex calculation with multiple operations


    npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

congratulations, you've taking your first step into hand-rolling an agent loop.

from here, we're going to start incorporating some more intermediate and advanced
concepts for 12-factor agents.


## Chapter 4 - Add Tests to agent.baml

Let's add some tests to our BAML agent.

to start, leave the baml logs enabled

    export BAML_LOG=debug

next, let's add some tests to the agent

We'll start with a simple test that checks the agent's ability to handle
a basic calculation.


```diff
baml_src/agent.baml
 ) -> CalculatorTools | DoneForNow {
     client Qwen3
-
     // client "openai/gpt-4o"
 
-    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
     prompt #"
         {{ _.role("system") }}
 
 
         You are a helpful assistant that can help with tasks.
     "#
   }
+
+test MathOperation {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+      {
+        "type": "user_input",
+        "data": "can you multiply 3 and 4?"
+      }
+    "#
+  }
+}
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/04-agent.baml baml_src/agent.baml

</details>

Run the tests

    npx baml-cli test

now, let's improve the test with assertions!

Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.


```diff
baml_src/agent.baml
 ) -> CalculatorTools | DoneForNow {
     client Qwen3
 
     prompt #"
     "#
   }
+  @@assert(hello, {{this.intent == "done_for_now"}})
 }
 
     "#
   }
+  @@assert(math_operation, {{this.intent == "multiply"}})
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/04b-agent.baml baml_src/agent.baml

</details>

Run the tests

    npx baml-cli test

as you add more tests, you can disable the logs to keep the output clean.
You may want to turn them on as you iterate on specific tests.


    export BAML_LOG=off

now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window


```diff
baml_src/agent.baml
   }
 }
-
 function DetermineNextStep(
     thread: string 
 ) -> CalculatorTools | DoneForNow {
     client Qwen3
+
     prompt #"
         {{ _.role("system") }}
     "#
   }
-  @@assert(hello, {{this.intent == "done_for_now"}})
+  @@assert(intent, {{this.intent == "done_for_now"}})
 }
 
     "#
   }
-  @@assert(math_operation, {{this.intent == "multiply"}})
+  @@assert(intent, {{this.intent == "multiply"}})
 }
 
+test LongMath {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+      [
+        {
+          "type": "user_input",
+          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
+        },
+        {
+          "type": "tool_call",
+          "data": {
+            "intent": "multiply",
+            "a": 3,
+            "b": 4
+          }
+        },
+        {
+          "type": "tool_response",
+          "data": 12
+        },
+        {
+          "type": "tool_call", 
+          "data": {
+            "intent": "divide",
+            "a": 12,
+            "b": 2
+          }
+        },
+        {
+          "type": "tool_response",
+          "data": 6
+        },
+        {
+          "type": "tool_call",
+          "data": {
+            "intent": "add", 
+            "a": 6,
+            "b": 12
+          }
+        },
+        {
+          "type": "tool_response",
+          "data": 18
+        }
+      ]
+    "#
+  }
+  @@assert(intent, {{this.intent == "done_for_now"}})
+  @@assert(answer, {{"18" in this.message}})
+}
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/04c-agent.baml baml_src/agent.baml

</details>

let's try to run it


    npx baml-cli test

## Chapter 5 - Multiple Human Tools

In this section, we'll add support for multiple tools that serve to
contact humans.


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

first, let's add a tool that can request clarification from a human

this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.


```diff
baml_src/agent.baml
+// human tools are async requests to a human
+type HumanTools = ClarificationRequest | DoneForNow
+
+class ClarificationRequest {
+  intent "request_more_information" @description("you can request more information from me")
+  message string
+}
+
 class DoneForNow {
   intent "done_for_now"
-  message string 
+
+  message string @description(#"
+    message to send to the user about the work that was done. 
+  "#)
 }
 
   }
 }
+
 function DetermineNextStep(
     thread: string 
-) -> CalculatorTools | DoneForNow {
+) -> HumanTools | CalculatorTools {
     client Qwen3
 
 }
 
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05-agent.baml baml_src/agent.baml

</details>

next, let's re-generate the client code

NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.


    npx baml-cli generate

now, let's update the agent to use the new tool


```diff
src/agent.ts
 }
 
-export async function agentLoop(thread: Thread): Promise<string> {
+export async function agentLoop(thread: Thread): Promise<Thread> {
 
     while (true) {
         switch (nextStep.intent) {
             case "done_for_now":
-                // response to human, return the next step object
-                return nextStep.message;
+            case "request_more_information":
+                // response to human, return the thread
+                return thread;
             case "add":
             case "subtract":
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05-agent.ts src/agent.ts

</details>

next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI


```diff
src/cli.ts
 // cli.ts lets you invoke the agent loop from the command line
 
-import { agentLoop, Thread, Event } from "./agent";
+import { agentLoop, Thread, Event } from "../src/agent";
 
+
+
 export async function cli() {
     // Get command line arguments, skipping the first two (node and script name)
     // Run the agent loop with the thread
     const result = await agentLoop(thread);
-    console.log(result);
+    let lastEvent = result.events.slice(-1)[0];
+
+    while (lastEvent.data.intent === "request_more_information") {
+        const message = await askHuman(lastEvent.data.message);
+        thread.events.push({ type: "human_response", data: message });
+        const result = await agentLoop(thread);
+        lastEvent = result.events.slice(-1)[0];
+    }
+
+    // print the final result
+    // optional - you could loop here too
+    console.log(lastEvent.data.message);
+    process.exit(0);
 }
+
+async function askHuman(message: string) {
+    const readline = require('readline').createInterface({
+        input: process.stdin,
+        output: process.stdout
+    });
+
+    return new Promise((resolve) => {
+        readline.question(`${message}\n> `, (answer: string) => {
+            resolve(answer);
+        });
+    });
+}
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05-cli.ts src/cli.ts

</details>

let's try it out


    npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '

next, let's add a test that checks the agent's ability to handle
a clarification request


```diff
baml_src/agent.baml
 ) -> HumanTools | CalculatorTools {
     client Qwen3
-
     // client "openai/gpt-4o"
 
 
+
+test MathOperationWithClarification {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+      "#
+  }
+  @@assert(intent, {{this.intent == "request_more_information"}})
+}
+
+test MathOperationPostClarification {
+  functions [DetermineNextStep]
+  args {
+    thread #"
+        [
+        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
+        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
+        {"type":"human_response","data":"lets try 12 instead"},
+      ]
+      "#
+  }
+  @@assert(intent, {{this.intent == "multiply"}})
+  @@assert(a, {{this.b == 12}})
+  @@assert(b, {{this.a == 3}})
+}
+        
+
+
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05b-agent.baml baml_src/agent.baml

</details>

and now we can run the tests again


    npx baml-cli test

you'll notice the new test passes, but the hello world test fails

This is because the agent's default behavior is to return "done_for_now"


```diff
baml_src/agent.baml
     api_key env.BASETEN_API_KEY 
   }
 
 function DetermineNextStep(
 ) -> HumanTools | CalculatorTools {
     client Qwen3
+
     // client "openai/gpt-4o"
 
     "#
   }
-  @@assert(intent, {{this.intent == "done_for_now"}})
+  @@assert(intent, {{this.intent == "request_more_information"}})
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/05c-agent.baml baml_src/agent.baml

</details>

Verify tests pass

    npx baml-cli test

## Chapter 6 - Customize Your Prompt with Reasoning

In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.

this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)

there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)


for this section, it will be helpful to leave the baml logs enabled

    export BAML_LOG=debug

update the agent prompt to include a reasoning step


```diff
baml_src/agent.baml
     api_key env.BASETEN_API_KEY 
   }
 
 function DetermineNextStep(
 
         {{ ctx.output_format }}
+
+        First, always plan out what to do next, for example:
+
+        - ...
+        - ...
+        - ...
+
+        {...} // schema
     "#
 }
   @@assert(b, {{this.a == 3}})
 }
-        
-
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/06-agent.baml baml_src/agent.baml

</details>

generate the updated client

    npx baml-cli generate

now, you can try it out with a simple prompt


    npx tsx src/index.ts 'can you multiply 3 and 4'

you should see output from the baml logs showing the reasoning steps

#### optional challenge

add a field to your tool output format that includes the reasoning steps in the output!


## Chapter 7 - Customize Your Context Window

In this section, we'll explore how to customize the context window
of the agent.

this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)


update the agent to pretty-print the Context window for the model


```diff
src/agent.ts
         // can change this to whatever custom serialization you want to do, XML, etc
         // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
-        return JSON.stringify(this.events);
+        return JSON.stringify(this.events, null, 2);
     }
 }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/07-agent.ts src/agent.ts

</details>

Test the formatting

    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

next, let's update the agent to use XML formatting instead

this is a very popular format for passing data to a model,

among other things, because of the token efficiency of XML.


```diff
src/agent.ts
 
     serializeForLLM() {
-        // can change this to whatever custom serialization you want to do, XML, etc
-        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
-        return JSON.stringify(this.events, null, 2);
+        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
     }
+
+    trimLeadingWhitespace(s: string) {
+        return s.replace(/^[ \t]+/gm, '');
+    }
+
+    serializeOneEvent(e: Event) {
+        return this.trimLeadingWhitespace(`
+            <${e.data?.intent || e.type}>
+            ${
+            typeof e.data !== 'object' ? e.data :
+            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
+            </${e.data?.intent || e.type}>
+        `)
+    }
 }
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/07b-agent.ts src/agent.ts

</details>

let's try it out


    BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'

lets update our tests to match the new output format


```diff
baml_src/agent.baml
         {{ ctx.output_format }}
 
-        First, always plan out what to do next, for example:
+        Always think about what to do next first, like:
 
         - ...
   args {
     thread #"
-      {
-        "type": "user_input",
-        "data": "hello!"
-      }
+      <user_input>
+        hello!
+      </user_input>
     "#
   }
   args {
     thread #"
-      {
-        "type": "user_input",
-        "data": "can you multiply 3 and 4?"
-      }
+      <user_input>
+        can you multiply 3 and 4?
+      </user_input>
     "#
   }
   args {
     thread #"
-      [
-        {
-          "type": "user_input",
-          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
-        },
-        {
-          "type": "tool_call",
-          "data": {
-            "intent": "multiply",
-            "a": 3,
-            "b": 4
-          }
-        },
-        {
-          "type": "tool_response",
-          "data": 12
-        },
-        {
-          "type": "tool_call", 
-          "data": {
-            "intent": "divide",
-            "a": 12,
-            "b": 2
-          }
-        },
-        {
-          "type": "tool_response",
-          "data": 6
-        },
-        {
-          "type": "tool_call",
-          "data": {
-            "intent": "add", 
-            "a": 6,
-            "b": 12
-          }
-        },
-        {
-          "type": "tool_response",
-          "data": 18
-        }
-      ]
+         <user_input>
+    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
+    </user_input>
+
+
+    <multiply>
+    a: 3
+    b: 4
+    </multiply>
+
+
+    <tool_response>
+    12
+    </tool_response>
+
+
+    <divide>
+    a: 12
+    b: 2
+    </divide>
+
+
+    <tool_response>
+    6
+    </tool_response>
+
+
+    <add>
+    a: 6
+    b: 12
+    </add>
+
+
+    <tool_response>
+    18
+    </tool_response>
+
     "#
   }
   args {
     thread #"
-          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+          <user_input>
+          can you multiply 3 and fe1iiaff10
+          </user_input>
       "#
   }
   args {
     thread #"
-        [
-        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
-        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
-        {"type":"human_response","data":"lets try 12 instead"},
-      ]
+        <user_input>
+        can you multiply 3 and FD*(#F&& ?
+        </user_input>
+
+        <request_more_information>
+        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
+        </request_more_information>
+
+        <human_response>
+        lets try 12 instead
+        </human_response>
       "#
   }
   @@assert(intent, {{this.intent == "multiply"}})
 }
         
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/07c-agent.baml baml_src/agent.baml

</details>

check out the updated tests


    npx baml-cli test

## Chapter 8 - Adding API Endpoints

Add an Express server to expose the agent via HTTP.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install Express and types

    npm install express && npm install --save-dev @types/express supertest

Add the server implementation

    cp ./walkthrough/08-server.ts src/server.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/08-server.ts
import express from 'express';
import { Thread, agentLoop } from '../src/agent';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    const result = await agentLoop(thread);
    res.json(result);
});

// GET /thread/:id - Get thread status 
app.get('/thread/:id', (req, res) => {
    // optional - add state
    res.status(404).json({ error: "Not implemented yet" });
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };
```

</details>

Start the server

    npx tsx src/server.ts

Test with curl (in another terminal)

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you add 3 and 4"}'

You should get an answer from the agent which includes the
agentic trace, ending in a message like:


    {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}

## Chapter 9 - In-Memory State and Async Clarification

Add state management and async clarification support.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Add some simple in-memory state management for threads

    cp ./walkthrough/09-state.ts src/state.ts

<details>
<summary>show file</summary>

```ts
// ./walkthrough/09-state.ts
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}
```

</details>

update the server to use the state management

* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id
* implement POST /thread/:id/response


```diff
src/server.ts
 import express from 'express';
 import { Thread, agentLoop } from '../src/agent';
+import { ThreadStore } from '../src/state';
 
 const app = express();
 app.set('json spaces', 2);
 
+const store = new ThreadStore();
+
 // POST /thread - Start new thread
 app.post('/thread', async (req, res) => {
         data: req.body.message
     }]);
-    const result = await agentLoop(thread);
-    res.json(result);
+    
+    const threadId = store.create(thread);
+    const newThread = await agentLoop(thread);
+    
+    store.update(threadId, newThread);
+
+    const lastEvent = newThread.events[newThread.events.length - 1];
+    // If we exited the loop, include the response URL so the client can
+    // push a new message onto the thread
+    lastEvent.data.response_url = `/thread/${threadId}/response`;
+
+    console.log("returning last event from endpoint", lastEvent);
+
+    res.json({ 
+        thread_id: threadId,
+        ...newThread 
+    });
 });
 
 app.get('/thread/:id', (req, res) => {
-    // optional - add state
-    res.status(404).json({ error: "Not implemented yet" });
+    const thread = store.get(req.params.id);
+    if (!thread) {
+        return res.status(404).json({ error: "Thread not found" });
+    }
+    res.json(thread);
 });
 
+// POST /thread/:id/response - Handle clarification response
+app.post('/thread/:id/response', async (req, res) => {
+    let thread = store.get(req.params.id);
+    if (!thread) {
+        return res.status(404).json({ error: "Thread not found" });
+    }
+    
+    thread.events.push({
+        type: "human_response",
+        data: req.body.message
+    });
+    
+    // loop until stop event
+    const newThread = await agentLoop(thread);
+    
+    store.update(req.params.id, newThread);
+
+    const lastEvent = newThread.events[newThread.events.length - 1];
+    lastEvent.data.response_url = `/thread/${req.params.id}/response`;
+
+    console.log("returning last event from endpoint", lastEvent);
+    
+    res.json(newThread);
+});
+
 const port = process.env.PORT || 3000;
 app.listen(port, () => {
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/09-server.ts src/server.ts

</details>

Start the server

    npx tsx src/server.ts

Test clarification flow

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you multiply 3 and xyz"}'

## Chapter 10 - Adding Human Approval

Add support for human approval of operations.

for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

update the server to handle human approvals

* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs


```diff
src/server.ts
 import express from 'express';
-import { Thread, agentLoop } from '../src/agent';
+import { Thread, agentLoop, handleNextStep } from '../src/agent';
 import { ThreadStore } from '../src/state';
 
 });
 
+
+type ApprovalPayload = {
+    type: "approval";
+    approved: boolean;
+    comment?: string;
+}
+
+type ResponsePayload = {
+    type: "response";
+    response: string;
+}
+
+type Payload = ApprovalPayload | ResponsePayload;
+
 // POST /thread/:id/response - Handle clarification response
 app.post('/thread/:id/response', async (req, res) => {
         return res.status(404).json({ error: "Thread not found" });
     }
+
+    const body: Payload = req.body;
+
+    let lastEvent = thread.events[thread.events.length - 1];
+
+    if (thread.awaitingHumanResponse() && body.type === 'response') {
+        thread.events.push({
+            type: "human_response",
+            data: body.response
+        });
+    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
+        // push feedback onto the thread
+        thread.events.push({
+            type: "tool_response",
+            data: `user denied the operation with feedback: "${body.comment}"`
+        });
+    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
+        // approved, run the tool, pushing results onto the thread
+        await handleNextStep(lastEvent.data, thread);
+    } else {
+        res.status(400).json({
+            error: "Invalid request: " + body.type,
+            awaitingHumanResponse: thread.awaitingHumanResponse(),
+            awaitingHumanApproval: thread.awaitingHumanApproval()
+        });
+        return;
+    }
+
     
-    thread.events.push({
-        type: "human_response",
-        data: req.body.message
-    });
-    
     // loop until stop event
     const newThread = await agentLoop(thread);
     store.update(req.params.id, newThread);
 
-    const lastEvent = newThread.events[newThread.events.length - 1];
+    lastEvent = newThread.events[newThread.events.length - 1];
     lastEvent.data.response_url = `/thread/${req.params.id}/response`;
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/10-server.ts src/server.ts

</details>

Add a few methods to the agent to handle approvals and responses

```diff
src/agent.ts
         `)
     }
+
+    awaitingHumanResponse(): boolean {
+        const lastEvent = this.events[this.events.length - 1];
+        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
+    }
+
+    awaitingHumanApproval(): boolean {
+        const lastEvent = this.events[this.events.length - 1];
+        return lastEvent.data.intent === 'divide';
+    }
 }
 
                 // response to human, return the thread
                 return thread;
+            case "divide":
+                // divide is scary, return it for human approval
+                return thread;
             case "add":
             case "subtract":
             case "multiply":
-            case "divide":
                 thread = await handleNextStep(nextStep, thread);
         }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/10-agent.ts src/agent.ts

</details>

Start the server

    npx tsx src/server.ts

Test division with approval

    curl -X POST http://localhost:3000/thread \
  -H "Content-Type: application/json" \
  -d '{"message":"can you divide 3 by 4"}'

You should see:

    {
      "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
      "events": [
        {
          "type": "user_input",
          "data": "can you divide 3 by 4"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "divide",
            "a": 3,
            "b": 4,
            "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
          }
        }
      ]
    }

reject the request with another curl call, changing the thread ID

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'

You should see: the last tool call is now `"intent":"divide","a":3,"b":5`

    {
      "events": [
        {
          "type": "user_input",
          "data": "can you divide 3 by 4"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "divide",
            "a": 3,
            "b": 4,
            "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
          }
        },
        {
          "type": "tool_response",
          "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "divide",
            "a": 3,
            "b": 5,
            "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
          }
        }
      ]
    }

now you can approve the operation

    curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
  -H "Content-Type: application/json" \
  -d '{"type": "approval", "approved": true}'

you should see the final message includes the tool response and final result!

    ...
    {
      "type": "tool_response",
      "data": 0.5
    },
    {
      "type": "done_for_now",
      "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
      "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
    }

## Chapter 11 - Human Approvals over email

in this section, we'll add support for human approvals over email.

This will start a little bit contrived, just to get the concepts down -

We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI

While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)


for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.

    export BAML_LOG=off

Install HumanLayer

    npm install humanlayer

Update CLI to send `divide` and `request_more_information` to a human via email

```diff
src/cli.ts
 // cli.ts lets you invoke the agent loop from the command line
 
+import { humanlayer } from "humanlayer";
 import { agentLoop, Thread, Event } from "../src/agent";
 
-
-
 export async function cli() {
     // Get command line arguments, skipping the first two (node and script name)
 
     // Run the agent loop with the thread
-    const result = await agentLoop(thread);
-    let lastEvent = result.events.slice(-1)[0];
+    let newThread = await agentLoop(thread);
+    let lastEvent = newThread.events.slice(-1)[0];
 
-    while (lastEvent.data.intent === "request_more_information") {
-        const message = await askHuman(lastEvent.data.message);
-        thread.events.push({ type: "human_response", data: message });
-        const result = await agentLoop(thread);
-        lastEvent = result.events.slice(-1)[0];
+    while (lastEvent.data.intent !== "done_for_now") {
+        const responseEvent = await askHuman(lastEvent);
+        thread.events.push(responseEvent);
+        newThread = await agentLoop(thread);
+        lastEvent = newThread.events.slice(-1)[0];
     }
 
     // print the final result
     console.log(lastEvent.data.message);
     process.exit(0);
 }
 
-async function askHuman(message: string) {
+async function askHuman(lastEvent: Event): Promise<Event> {
+    if (process.env.HUMANLAYER_API_KEY) {
+        return await askHumanEmail(lastEvent);
+    } else {
+        return await askHumanCLI(lastEvent.data.message);
+    }
+}
+
+async function askHumanCLI(message: string): Promise<Event> {
     const readline = require('readline').createInterface({
         input: process.stdin,
     return new Promise((resolve) => {
         readline.question(`${message}\n> `, (answer: string) => {
-            resolve(answer);
+            resolve({ type: "human_response", data: answer });
         });
     });
 }
+
+export async function askHumanEmail(lastEvent: Event): Promise<Event> {
+    if (!process.env.HUMANLAYER_EMAIL) {
+        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+    }
+    const hl = humanlayer({ //reads apiKey from env
+        // name of this agent
+        runId: "12fa-cli-agent",
+        verbose: true,
+        contactChannel: {
+            // agent should request permission via email
+            email: {
+                address: process.env.HUMANLAYER_EMAIL,
+            }
+        }
+    }) 
+
+    if (lastEvent.data.intent === "divide") {
+        // fetch approval synchronously - this will block until reply
+        const response = await hl.fetchHumanApproval({
+            spec: {
+                fn: "divide",
+                kwargs: {
+                    a: lastEvent.data.a,
+                    b: lastEvent.data.b
+                }
+            }
+        })
+
+        if (response.approved) {
+            const result = lastEvent.data.a / lastEvent.data.b;
+            console.log("tool_response", result);
+            return {
+                "type": "tool_response",
+                "data": result
+            };
+        } else {
+            return {
+                "type": "tool_response",
+                "data": `user denied operation ${lastEvent.data.intent}
+                with feedback: ${response.comment}`
+            };
+        }
+    }
+    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
+}
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/11-cli.ts src/cli.ts

</details>

Run the CLI

    npx tsx src/index.ts 'can you divide 4 by 5'

The last line of your program should mention human review step

    nextStep { intent: 'divide', a: 4, b: 5 }
    HumanLayer: Requested human approval from HumanLayer cloud

go ahead and respond to the email with some feedback:

![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true)


you should get another email with an updated attempt based on your feedback!

You can go ahead and approve this one:

![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true)


and your final output will look like

    nextStep {
     intent: 'done_for_now',
     message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
    }
    The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!

lets implement the `request_more_information` flow as well


```diff
src/cli.ts
     }) 
 
+    if (lastEvent.data.intent === "request_more_information") {
+        // fetch response synchronously - this will block until reply
+        const response = await hl.fetchHumanResponse({
+            spec: {
+                msg: lastEvent.data.message
+            }
+        })
+        return {
+            "type": "tool_response",
+            "data": response
+        }
+    }
+    
     if (lastEvent.data.intent === "divide") {
         // fetch approval synchronously - this will block until reply
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/11b-cli.ts src/cli.ts

</details>

lets test the require_approval flow as by asking for a calculation
with garbled input:


    npx tsx src/index.ts 'can you multiply 4 and xyz'

You should get an email with a request for clarification

    Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?

you can response with something like

    use 8 instead of xyz

you should see a final result on the CLI like

    I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.

as a final step, lets explore using a custom html template for the email


```diff
src/cli.ts
             email: {
                 address: process.env.HUMANLAYER_EMAIL,
+                // custom email body - jinja
+                template: `{% if type == 'request_more_information' %}
+{{ event.spec.msg }}
+{% else %}
+agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
+with args: {{event.spec.kwargs}}
+<br><br>
+reply to this email to approve
+{% endif %}`
             }
         }
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/11c-cli.ts src/cli.ts

</details>

first try with divide:


    npx tsx src/index.ts 'can you divide 4 by 5'

you should see a slightly different email with the custom template

![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true)

feel free to run with the flow and then you can try updating the template to your liking

(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)

try triggering "request_more_information" as well!


thats it - in the next chapter, we'll build a fully email-driven
workflow agent that uses webhooks for human approval


## Chapter XX - HumanLayer Webhook Integration

the previous sections used the humanlayer SDK in "synchronous mode" - that
means every time we wait for human approval, we sit in a loop
polling until the human response if received.

That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results.


add code to initialize humanlayer in the server


```diff
src/server.ts
 import { Thread, agentLoop, handleNextStep } from '../src/agent';
 import { ThreadStore } from '../src/state';
+import { humanlayer } from 'humanlayer';
 
 const app = express();
 const store = new ThreadStore();
 
+const getHumanlayer = () => {
+    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
+    if (!HUMANLAYER_EMAIL) {
+        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+    }
+
+    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
+    if (!HUMANLAYER_API_KEY) {
+        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
+    }
+    return humanlayer({
+        runId: `12fa-agent`,
+        contactChannel: {
+            email: { address: HUMANLAYER_EMAIL }
+        }
+    });
+}
+
 // POST /thread - Start new thread
 app.post('/thread', async (req, res) => {
     
     // loop until stop event
-    const newThread = await agentLoop(thread);
+    const result = await agentLoop(thread);
 
-    store.update(req.params.id, newThread);
+    store.update(req.params.id, result);
 
-    lastEvent = newThread.events[newThread.events.length - 1];
+    lastEvent = result.events[result.events.length - 1];
     lastEvent.data.response_url = `/thread/${req.params.id}/response`;
 
     console.log("returning last event from endpoint", lastEvent);
     
-    res.json(newThread);
+    res.json(result);
 });
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/12-1-server-init.ts src/server.ts

</details>

next, lets update the /thread endpoint to

1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls


Update the server to be able to handle request_clarification responses

- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response


```diff
src/server.ts
-import express from 'express';
+import express, { Request, Response } from 'express';
 import { Thread, agentLoop, handleNextStep } from '../src/agent';
 import { ThreadStore } from '../src/state';
-import { humanlayer } from 'humanlayer';
+import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
 
 const app = express();
     });
 }
-
 // POST /thread - Start new thread
-app.post('/thread', async (req, res) => {
+app.post('/thread', async (req: Request, res: Response) => {
     const thread = new Thread([{
         type: "user_input",
     }]);
     
-    const threadId = store.create(thread);
-    const newThread = await agentLoop(thread);
-    
-    store.update(threadId, newThread);
+    // run agent loop asynchronously, return immediately
+    Promise.resolve().then(async () => {
+        const threadId = store.create(thread);
+        const newThread = await agentLoop(thread);
+        
+        store.update(threadId, newThread);
 
-    const lastEvent = newThread.events[newThread.events.length - 1];
-    // If we exited the loop, include the response URL so the client can
-    // push a new message onto the thread
-    lastEvent.data.response_url = `/thread/${threadId}/response`;
+        const lastEvent = newThread.events[newThread.events.length - 1];
 
-    console.log("returning last event from endpoint", lastEvent);
-
-    res.json({ 
-        thread_id: threadId,
-        ...newThread 
+        if (thread.awaitingHumanResponse()) {
+            const hl = getHumanlayer();
+            // create a human contact - returns immediately
+            hl.createHumanContact({
+                spec: {
+                    msg: lastEvent.data.message,
+                    state: {
+                        thread_id: threadId,
+                    }
+                }
+            });
+        }
     });
+
+    res.json({ status: "processing" });
 });
 
 // GET /thread/:id - Get thread status
-app.get('/thread/:id', (req, res) => {
+app.get('/thread/:id', (req: Request, res: Response) => {
     const thread = store.get(req.params.id);
     if (!thread) {
 });
 
+type WebhookResponse = V1Beta2HumanContactCompleted;
 
-type ApprovalPayload = {
-    type: "approval";
-    approved: boolean;
-    comment?: string;
-}
+const handleHumanResponse = async (req: Request, res: Response) => {
 
-type ResponsePayload = {
-    type: "response";
-    response: string;
 }
 
-type Payload = ApprovalPayload | ResponsePayload;
+app.post('/webhook', async (req: Request, res: Response) => {
+    console.log("webhook response", req.body);
+    const response = req.body as WebhookResponse;
 
-// POST /thread/:id/response - Handle clarification response
-app.post('/thread/:id/response', async (req, res) => {
-    let thread = store.get(req.params.id);
+    // response is guaranteed to be set on a webhook
+    const humanResponse: string = response.event.status?.response as string;
+
+    const threadId = response.event.spec.state?.thread_id;
+    if (!threadId) {
+        return res.status(400).json({ error: "Thread ID not found" });
+    }
+
+    const thread = store.get(threadId);
     if (!thread) {
         return res.status(404).json({ error: "Thread not found" });
     }
 
-    const body: Payload = req.body;
-
-    let lastEvent = thread.events[thread.events.length - 1];
-
-    if (thread.awaitingHumanResponse() && body.type === 'response') {
-        thread.events.push({
-            type: "human_response",
-            data: body.response
-        });
-    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
-        // push feedback onto the thread
-        thread.events.push({
-            type: "tool_response",
-            data: `user denied the operation with feedback: "${body.comment}"`
-        });
-    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
-        // approved, run the tool, pushing results onto the thread
-        await handleNextStep(lastEvent.data, thread);
-    } else {
-        res.status(400).json({
-            error: "Invalid request: " + body.type,
-            awaitingHumanResponse: thread.awaitingHumanResponse(),
-            awaitingHumanApproval: thread.awaitingHumanApproval()
-        });
-        return;
+    if (!thread.awaitingHumanResponse()) {
+        return res.status(400).json({ error: "Thread is not awaiting human response" });
     }
 
-    
-    // loop until stop event
-    const result = await agentLoop(thread);
-
-    store.update(req.params.id, result);
-
-    lastEvent = result.events[result.events.length - 1];
-    lastEvent.data.response_url = `/thread/${req.params.id}/response`;
-
-    console.log("returning last event from endpoint", lastEvent);
-    
-    res.json(result);
 });
 
```

<details>
<summary>skip this step</summary>

    cp ./walkthrough/12a-server.ts src/server.ts

</details>

Start the server in another terminal

    npx tsx src/server.ts

now that the server is running, send a payload to the '/thread' endpoint


__ do the response step

__ now handle approvals for divide

__ now also handle done_for_now


================================================
FILE: workshops/2025-05-17/walkthrough.yaml
================================================
title: "Building the 12-factor agent template from scratch"
text: "Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology."

targets:
  - markdown: "./build/walkthrough.md"
    onChange:
      diff: true
      cp: true
    newFiles:
      cat: false
      cp: true
  - folders:
      path: "./build/sections"
      skip:
        - "cleanup"
      final:
        dirName: "final"

sections:
  - name: cleanup
    title: "Cleanup"
    text: "Make sure you're starting from a clean slate"
    steps:
      - text: "Clean up existing files"
        command: |
          rm -rf baml_src/ && rm -rf src/

  - name: hello-world
    title: "Chapter 0 - Hello World"
    text: "Let's start with a basic TypeScript setup and a hello world program."
    steps:
      - text: |
          This guide is written in TypeScript (yes, a python version is coming soon)

          There are many checkpoints between the every file edit in theworkshop steps, 
          so even if you aren't super familiar with typescript,
          you should be able to keep up and run each example.

          To run this guide, you'll need a relatively recent version of nodejs and npm installed

          You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine

        command:
            brew install node@20
        results:
          - text: "You should see the node version"
            code: |
              node --version

      - text: "Copy initial package.json"
        file: {src: ./walkthrough/00-package.json, dest: package.json}
      - text: "Install dependencies"
        command: |
          npm install
        incremental: true
      - text: "Copy tsconfig.json"
        file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json}
      - text: "add .gitignore"
        file: {src: ./walkthrough/00-.gitignore, dest: .gitignore}
      - text: "Create src folder"
        dir: {create: true, path: src}
      - text: "Add a simple hello world index.ts"
        file: {src: ./walkthrough/00-index.ts, dest: src/index.ts}
      - text: "Run it to verify"
        command: |
          npx tsx src/index.ts
        results:
          - text: "You should see:"
            code: |
              hello, world!

  - name: cli-and-agent
    title: "Chapter 1 - CLI and Agent Loop"
    text: "Now let's add BAML and create our first agent with a CLI interface."
    steps:
      - text: |
           First, we'll need to install [BAML](https://github.com/boundaryml/baml)
           which is a tool for prompting and structured outputs.
        command: |
          npm install @boundaryml/baml
        incremental: true
      - text: "Initialize BAML"
        command: |
          npx baml-cli init
        incremental: true
      - text: "Remove default resume.baml"
        command: |
          rm baml_src/resume.baml
        incremental: true
      - text: "Add our starter agent, a single baml prompt that we'll build on"
        file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
      - text: "Generate BAML client code"
        command: |
          npx baml-cli generate
        incremental: true
      - text: "Enable BAML logging for this section"
        command: |
          export BAML_LOG=debug
      - text: "Add the CLI interface"
        file: {src: ./walkthrough/01-cli.ts, dest: src/cli.ts}
      - text: "Update index.ts to use the CLI"
        file: {src: ./walkthrough/01-index.ts, dest: src/index.ts}
      - text: "Add the agent implementation"
        file: {src: ./walkthrough/01-agent.ts, dest: src/agent.ts}
      - text: |
          The the BAML code is configured to use BASETEN_API_KEY by default

          To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co),
          and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/).

          ```rust 
            function DetermineNextStep(thread: string) -> DoneForNow {
                client Qwen3
                // ...
          ```

          If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key.

          If you want to try swapping out the model, you can change the `client` line.

          [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)

          For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) 
          or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
          
          For example, to use openai with an OPENAI_API_KEY, you can do:

              client "openai/gpt-4o"
          
      - text: Set your env vars
        command: |
          export BASETEN_API_KEY=...
          export BASETEN_BASE_URL=...
      - text: "Try it out"
        command: |
          npx tsx src/index.ts hello
        results:
          - text: you should see a familiar response from the model 
            code: |
              {
                intent: 'done_for_now',
                message: 'Hello! How can I assist you today?'
              }

  - name: calculator-tools
    title: "Chapter 2 - Add Calculator Tools"
    text: "Let's add some calculator tools to our agent."
    steps:
      - text: |
          Let's start by adding a tool definition for the calculator

          These are simpile structured outputs that we'll ask the model to 
          return as a "next step" in the agentic loop.

        file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml}
      - text: |
          Now, let's update the agent's DetermineNextStep method to
          expose the calculator tools as potential next steps

        file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml}
      - text: "Generate updated BAML client"
        command: |
          npx baml-cli generate
        incremental: true
      - text: "Try out the calculator"
        command: |
          npx tsx src/index.ts 'can you add 3 and 4'
        results:
          - text: "You should see a tool call to the calculator"
            code: |
              {
                intent: 'add',
                a: 3,
                b: 4
              }

  - name: tool-loop
    title: "Chapter 3 - Process Tool Calls in a Loop"
    text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
    steps:
      - text: |
          First, lets update the agent to handle the tool call
        file: {src: ./walkthrough/03-agent.ts, dest: src/agent.ts}
      - text: |
          Now, lets try it out
        command: |
          npx tsx src/index.ts 'can you add 3 and 4'
        results:
          - text: you should see the agent call the tool and then return the result
            code: |
              {
                intent: 'done_for_now',
                message: 'The sum of 3 and 4 is 7.'
              }
      - text: "For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output"
        command: |
          export BAML_LOG=off
      - text: "Try a multi-step calculation"
        command: |
          npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
      - text: "you'll notice that tools like multiply and divide are not available"
        command: |
          npx tsx src/index.ts 'can you multiply 3 and 4'
      - text: |
          next, let's add handlers for the rest of the calculator tools
        file: {src: ./walkthrough/03b-agent.ts, dest: src/agent.ts}
      - text: "Test subtraction"
        command: |
          npx tsx src/index.ts 'can you subtract 3 from 4'
      - text: |
          now, let's test the multiplication tool
        command: |
          npx tsx src/index.ts 'can you multiply 3 and 4'
      - text: |
          finally, let's test a more complex calculation with multiple operations
        command: |
          npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
      - text: |
          congratulations, you've taking your first step into hand-rolling an agent loop.

          from here, we're going to start incorporating some more intermediate and advanced
          concepts for 12-factor agents.

  - name: baml-tests
    title: "Chapter 4 - Add Tests to agent.baml"
    text: "Let's add some tests to our BAML agent."
    steps:
      - text: to start, leave the baml logs enabled
        command: |
          export BAML_LOG=debug
      - text: |
          next, let's add some tests to the agent

          We'll start with a simple test that checks the agent's ability to handle
          a basic calculation.

        file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml}
      - text: "Run the tests"
        command: |
          npx baml-cli test
      - text: |
          now, let's improve the test with assertions!

          Assertions are a great way to make sure the agent is working as expected,
          and can easily be extended to check for more complex behavior.

        file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml}
      - text: "Run the tests"
        command: |
          npx baml-cli test
      - text: |
          as you add more tests, you can disable the logs to keep the output clean.
          You may want to turn them on as you iterate on specific tests.
        command: |
          export BAML_LOG=off
      - text: |
          now, let's add some more complex test cases,
          where we resume from in the middle of an in-progress
          agentic context window


        file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml}
      - text: |
          let's try to run it
        command: |
          npx baml-cli test

  - name: human-tools
    title: "Chapter 5 - Multiple Human Tools"
    text: |
      In this section, we'll add support for multiple tools that serve to
      contact humans.
    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: |
          first, let's add a tool that can request clarification from a human

          this will be different from the "done_for_now" tool,
          and can be used to more flexibly handle different types of human interactions
          in your agent.

        file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml}
      - text: |
          next, let's re-generate the client code

          NOTE - if you're using the VSCode extension for BAML,
          the client will be regenerated automatically when you save the file
          in your editor.

        command: |
          npx baml-cli generate
        incremental: true
      - text: |
          now, let's update the agent to use the new tool

        file: {src: ./walkthrough/05-agent.ts, dest: src/agent.ts}
      - text: |
          next, let's update the CLI to handle clarification requests
          by requesting input from the user on the CLI

        file: {src: ./walkthrough/05-cli.ts, dest: src/cli.ts}
      - text: |
          let's try it out

        command: |
          npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '
      - text: |
          next, let's add a test that checks the agent's ability to handle
          a clarification request

        file: {src: ./walkthrough/05b-agent.baml, dest: baml_src/agent.baml}
      - text: |
          and now we can run the tests again
        command: |
          npx baml-cli test
      - text: |
          you'll notice the new test passes, but the hello world test fails

          This is because the agent's default behavior is to return "done_for_now"

        file: {src: ./walkthrough/05c-agent.baml, dest: baml_src/agent.baml}
      - text: "Verify tests pass"
        command: |
          npx baml-cli test

  - name: customize-prompt
    title: "Chapter 6 - Customize Your Prompt with Reasoning"
    text: |
      In this section, we'll explore how to customize the prompt of the agent
      with reasoning steps.

      this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)

      there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)

    steps:
      - text: "for this section, it will be helpful to leave the baml logs enabled"
        command: |
          export BAML_LOG=debug
      - text: |
          update the agent prompt to include a reasoning step
        file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
      - text: generate the updated client
        command: |
          npx baml-cli generate
        incremental: true
      - text: |
          now, you can try it out with a simple prompt
        command: |
          npx tsx src/index.ts 'can you multiply 3 and 4'
        results:
          - text: you should see output from the baml logs showing the reasoning steps
      - text: |
           #### optional challenge

           add a field to your tool output format that includes the reasoning steps in the output!

  - name: context-window
    title: "Chapter 7 - Customize Your Context Window"
    text: |
      In this section, we'll explore how to customize the context window
      of the agent.

      this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)

    steps:
      - text: |
          update the agent to pretty-print the Context window for the model
        file: {src: ./walkthrough/07-agent.ts, dest: src/agent.ts}
      - text: "Test the formatting"
        command: |
          BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
      - text: |
          next, let's update the agent to use XML formatting instead

          this is a very popular format for passing data to a model,

          among other things, because of the token efficiency of XML.

        file: {src: ./walkthrough/07b-agent.ts, dest: src/agent.ts}
      - text: |
          let's try it out
        command: |
          BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
      - text: |
          lets update our tests to match the new output format
        file: {src: ./walkthrough/07c-agent.baml, dest: baml_src/agent.baml}
      - text: |
          check out the updated tests
        command: |
          npx baml-cli test

  - name: api-endpoints
    title: "Chapter 8 - Adding API Endpoints"
    text: "Add an Express server to expose the agent via HTTP."
    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: "Install Express and types"
        command: |
          npm install express && npm install --save-dev @types/express supertest
        incremental: true
      - text: "Add the server implementation"
        file: {src: ./walkthrough/08-server.ts, dest: src/server.ts}
      - text: "Start the server"
        command: |
          npx tsx src/server.ts
      - text: "Test with curl (in another terminal)"
        command: |
          curl -X POST http://localhost:3000/thread \
            -H "Content-Type: application/json" \
            -d '{"message":"can you add 3 and 4"}'
        results:
          - text: |
             You should get an answer from the agent which includes the
             agentic trace, ending in a message like:

            code: |
              {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}

  - name: state-management
    title: "Chapter 9 - In-Memory State and Async Clarification"
    text: "Add state management and async clarification support."
    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: "Add some simple in-memory state management for threads"
        file: {src: ./walkthrough/09-state.ts, dest: src/state.ts}
      - text: |
          update the server to use the state management

          * Add thread state management using `ThreadStore`
          * return thread IDs and response URLs from the /thread endpoint
          * implement GET /thread/:id
          * implement POST /thread/:id/response
        file: {src: ./walkthrough/09-server.ts, dest: src/server.ts}
      - text: "Start the server"
        command: |
          npx tsx src/server.ts
      - text: "Test clarification flow"
        command: |
          curl -X POST http://localhost:3000/thread \
            -H "Content-Type: application/json" \
            -d '{"message":"can you multiply 3 and xyz"}'

  - name: human-approval
    title: "Chapter 10 - Adding Human Approval"
    text: "Add support for human approval of operations."
    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: |
          update the server to handle human approvals

          * Import `handleNextStep` to execute approved actions
          * Add two payload types to distinguish approvals from responses
          * Handle responses and approvals differently in the endpoint
          * Show better error messages when things go wrongs

        file: {src: ./walkthrough/10-server.ts, dest: src/server.ts}
      - text: "Add a few methods to the agent to handle approvals and responses"
        file: {src: ./walkthrough/10-agent.ts, dest: src/agent.ts}
      - text: "Start the server"
        command: |
          npx tsx src/server.ts
      - text: "Test division with approval"
        command: |
          curl -X POST http://localhost:3000/thread \
            -H "Content-Type: application/json" \
            -d '{"message":"can you divide 3 by 4"}'
        results:
          - text: "You should see:"
            code: |
              {
                "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
                "events": [
                  {
                    "type": "user_input",
                    "data": "can you divide 3 by 4"
                  },
                  {
                    "type": "tool_call",
                    "data": {
                      "intent": "divide",
                      "a": 3,
                      "b": 4,
                      "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
                    }
                  }
                ]
              }
      - text: "reject the request with another curl call, changing the thread ID"
        command: |
          curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
            -H "Content-Type: application/json" \
            -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'
        results:
          - text: 'You should see: the last tool call is now `"intent":"divide","a":3,"b":5`'
            code: |
              {
                "events": [
                  {
                    "type": "user_input",
                    "data": "can you divide 3 by 4"
                  },
                  {
                    "type": "tool_call",
                    "data": {
                      "intent": "divide",
                      "a": 3,
                      "b": 4,
                      "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
                    }
                  },
                  {
                    "type": "tool_response",
                    "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
                  },
                  {
                    "type": "tool_call",
                    "data": {
                      "intent": "divide",
                      "a": 3,
                      "b": 5,
                      "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
                    }
                  }
                ]
              }
      - text: "now you can approve the operation"
        command: |
          curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
            -H "Content-Type: application/json" \
            -d '{"type": "approval", "approved": true}'
        results:
          - text: "you should see the final message includes the tool response and final result!"
            code: |
              ...
              {
                "type": "tool_response",
                "data": 0.5
              },
              {
                "type": "done_for_now",
                "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
                "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
              }

  - name: humanlayer-approval
    title: "Chapter 11 - Human Approvals over email"
    text: |
      in this section, we'll add support for human approvals over email.

      This will start a little bit contrived, just to get the concepts down -

      We'll start by invoking the workflow from the CLI but approvals for `divide`
      and `request_more_information` will be handled over email,
      then the final `done_for_now` answer will be printed back to the CLI

      While contrived, this is a great example of the flexibility you get from
      [factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)

    steps:
      - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
        command: |
          export BAML_LOG=off
      - text: "Install HumanLayer"
        command: |
          npm install humanlayer
        incremental: true
      - text: "Update CLI to send `divide` and `request_more_information` to a human via email"
        file: {src: ./walkthrough/11-cli.ts, dest: src/cli.ts}
      - text: "Run the CLI"
        command: |
          npx tsx src/index.ts 'can you divide 4 by 5'
        results:
          - text: "The last line of your program should mention human review step"
            code: |
              nextStep { intent: 'divide', a: 4, b: 5 }
              HumanLayer: Requested human approval from HumanLayer cloud
      - text: |
          go ahead and respond to the email with some feedback:

          ![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true)
      - text: |
          you should get another email with an updated attempt based on your feedback!

          You can go ahead and approve this one:

          ![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true)
        results:
          - text: and your final output will look like
            code: |
              nextStep {
               intent: 'done_for_now',
               message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
              }
              The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!
      - text: |
          lets implement the `request_more_information` flow as well
        file: {src: ./walkthrough/11b-cli.ts, dest: src/cli.ts}
      - text: |
          lets test the require_approval flow as by asking for a calculation
          with garbled input:
        command: |
          npx tsx src/index.ts 'can you multiply 4 and xyz'
      - text: "You should get an email with a request for clarification"
        command: |
          Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?
      - text: you can response with something like
        command: |
          use 8 instead of xyz
        results:
          - text: you should see a final result on the CLI like
            code: |
              I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.
      - text: |
          as a final step, lets explore using a custom html template for the email
        file: {src: ./walkthrough/11c-cli.ts, dest: src/cli.ts}
      - text: |
          first try with divide:
        command: |
          npx tsx src/index.ts 'can you divide 4 by 5'
        results:
          - text: |
              you should see a slightly different email with the custom template

              ![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true)

              feel free to run with the flow and then you can try updating the template to your liking

              (if you're using cursor, something as simple as highlighting the template and asking to "make it better"
              should do the trick)

              try triggering "request_more_information" as well!
      - text: |
          thats it - in the next chapter, we'll build a fully email-driven
          workflow agent that uses webhooks for human approval

  - name: humanlayer-webhook
    title: "Chapter XX - HumanLayer Webhook Integration"
    text: |
      the previous sections used the humanlayer SDK in "synchronous mode" - that
      means every time we wait for human approval, we sit in a loop
      polling until the human response if received.

      That's obviously not ideal, especially for production workloads,
      so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
      by updating the server to end processing after contacting a human, and use webhooks to receive the results.

    steps:
      - text: |
          add code to initialize humanlayer in the server
        file: {src: ./walkthrough/12-1-server-init.ts, dest: src/server.ts}
      - text: |
          next, lets update the /thread endpoint to

          1. handle requests asynchronously, returning immediately
          2. create a human contact on request_more_information and done_for_now calls

        # file: {src: }
      - text: |
          Update the server to be able to handle request_clarification responses

          - remove the old /response endpoint and types
          - update the /thread endpoint to run processing asynchronously, return immediately
          - send a state.threadId when requesting human responses
          - add a handleHumanResponse function to process the human response
          - add a /webhook endpoint to handle the webhook response

        file: {src: ./walkthrough/12a-server.ts, dest: src/server.ts}
      - text: "Start the server in another terminal"
        command: |
          npx tsx src/server.ts
      - text: |
          now that the server is running, send a payload to the '/thread' endpoint
      - text: __ do the response step
      - text: __ now handle approvals for divide
      - text: __ now also handle done_for_now

================================================
FILE: workshops/2025-07-16/.gitignore
================================================
baml_src/*.baml
src/*.ts
package.json
package-lock.json
tsconfig.json
build/
tmp/


================================================
FILE: workshops/2025-07-16/CLAUDE.md
================================================
# Workshop 2025-07-16: Python/Jupyter Notebook Implementation

• **Main Tool**: `walkthroughgen_py.py` - Converts TypeScript walkthrough to Jupyter notebooks
• **Config**: `walkthrough.yaml` - Defines notebook structure and content
• **Output**: `workshop_final.ipynb` - Generated notebook with Chapters 0-7
• **Testing**: `test_notebook_colab_sim.sh` - Simulates Google Colab environment

## Key Implementation Learnings

• **No async/await in notebooks** - All BAML calls must be synchronous, remove all async patterns
• **No sys.argv** - Main functions accept parameters directly: `main("hello")` not command line args
• **Global namespace** - Functions defined in cells persist globally, no module imports between cells
• **BAML setup is optional** - Use `baml_setup: true` step only when introducing BAML (Chapter 1+)
• **get_baml_client() pattern** - Required workaround for Google Colab import cache issues
• **BAML files from GitHub** - Fetch with curl since Colab can't display local BAML files
• **Regenerate BAML** - Use `regenerate_baml: true` in run_main when BAML files change
• **Import removal** - Remove `from baml_client import get_baml_client` imports from Python files
• **IN_COLAB detection** - Use try/except on google.colab import to detect environment
• **Human input handling** - get_human_input() uses real input() in Colab, auto-responses locally

## Implementation Patterns

• **walkthroughgen_py.py enhancements** - Added kwargs support for run_main steps
• **Test simulation** - test_notebook_colab_sim.sh creates clean venv with all dependencies
• **Debug artifacts** - Test runs preserved in ./tmp/test_TIMESTAMP/ directories
• **BAML test support** - baml-cli test works fine in notebooks, contrary to initial assumption
• **Tool execution** - All calculator operations (add/subtract/multiply/divide) in agent loop
• **Clarification flow** - ClarificationRequest tool for handling ambiguous inputs
• **Serialization formats** - JSON vs XML for thread history (XML more token-efficient)
• **Progressive complexity** - Start with hello world, gradually add BAML, tools, loops, tests

## Chapter Implementation Status

• **Chapter 0**: Hello World - Simple Python program, no BAML ✅
• **Chapter 1**: CLI and Agent - BAML introduction, basic agent ✅
• **Chapter 2**: Calculator Tools - Tool definitions without execution ✅
• **Chapter 3**: Tool Loop - Full agent loop with tool execution ✅
• **Chapter 4**: BAML Tests - Test cases with assertions ✅
• **Chapter 5**: Human Tools - Clarification requests with input handling ✅
• **Chapter 6**: Improved Prompting - Reasoning steps in prompts ✅
• **Chapter 7**: Context Serialization - JSON/XML thread formats ✅
• **Chapters 8-12**: Skipped - Server-based features not suitable for notebooks ⚠️

## Common Pitfalls Avoided

• **Import errors** - baml_client imports fail in notebooks, use global get_baml_client
• **Async patterns** - Notebooks can't handle async/await, everything must be sync
• **File paths** - Use absolute paths from notebook directory, handle ./ prefixes
• **BAML file conflicts** - Each chapter updates same files (agent.baml) not chapter-specific
• **Tool registration** - Ensure all tool types handled in agent loop switch statement
• **Test expectations** - BAML tests may have varying outputs, assertions verify key properties
• **Environment differences** - Code must work in both Colab and local testing environments

## Testing Commands

• Generate notebook: `uv run python walkthroughgen_py.py walkthrough.yaml -o test.ipynb`
• Full Colab sim: `./test_notebook_colab_sim.sh`
• Run BAML tests: `baml-cli test` (from directory with baml_src)

## File Structure

• `walkthrough/*.py` - Python implementations of each chapter's code
• `walkthrough/*.baml` - BAML files fetched from GitHub during notebook execution
• `walkthroughgen_py.py` - Main conversion tool
• `walkthrough.yaml` - Notebook definition with all chapters
• `test_notebook_colab_sim.sh` - Full Colab environment simulation
• `workshop_final.ipynb` - Final generated notebook ready for workshop


================================================
FILE: workshops/2025-07-16/hack/analyze_log_capture.py
================================================
#!/usr/bin/env python3
"""
Analyze notebook for BAML log capture success/failure
"""
import json
import sys
import os

def check_logs(notebook_path):
    """Check if BAML logs were captured in the notebook"""
    
    if not os.path.exists(notebook_path):
        print(f"❌ Notebook not found: {notebook_path}")
        return False, False
        
    with open(notebook_path) as f:
        nb = json.load(f)
    
    found_log_pattern = False
    found_capture_test = False
    
    for i, cell in enumerate(nb['cells']):
        if cell['cell_type'] == 'code' and 'outputs' in cell:
            # Check if this is a log capture test cell
            source = ''.join(cell.get('source', []))
            if 'run_with_baml_logs' in source:
                found_capture_test = True
                print(f'Found log capture test in cell {i}')
                
                # Check outputs for BAML logs
                for output in cell['outputs']:
                    if output.get('output_type') == 'stream' and 'text' in output:
                        text = ''.join(output['text'])
                        # Look for the specific BAML log pattern
                        if '---Parsed Response (class DoneForNow)---' in text:
                            found_log_pattern = True
                            print(f'✅ FOUND BAML LOG PATTERN in cell {i} output!')
                            log_lines = [line for line in text.split('\n') if 'Parsed Response' in line]
                            if log_lines:
                                print(f'Log excerpt: {log_lines[0]}')
                        
                        # Also check for our test markers
                        if 'Captured BAML Logs' in text:
                            print(f'Found "Captured BAML Logs" section in cell {i}')
                        if 'No BAML Logs Captured' in text:
                            print(f'Found "No BAML Logs Captured" section in cell {i}')
    
    return found_capture_test, found_log_pattern

def main():
    if len(sys.argv) != 2:
        print("Usage: python analyze_log_capture.py <notebook_path>")
        sys.exit(1)
        
    notebook_path = sys.argv[1]
    capture_test_found, log_pattern_found = check_logs(notebook_path)

    if not capture_test_found:
        print('❌ FAIL: No log capture test found in notebook')
        sys.exit(1)

    if log_pattern_found:
        print('✅ PASS: BAML logs successfully captured in notebook output!')
        sys.exit(0)
    else:
        print('❌ FAIL: BAML log pattern not found in captured output')
        print('This means the log capture method is NOT working')
        sys.exit(1)

if __name__ == '__main__':
    main()

================================================
FILE: workshops/2025-07-16/hack/inspect_notebook.py
================================================
#!/usr/bin/env python3
"""
Utility to inspect notebook cell outputs for debugging
"""
import json
import sys
import os

def inspect_notebook(notebook_path, filter_keyword=None):
    """Inspect notebook cells and outputs"""
    
    if not os.path.exists(notebook_path):
        print(f"❌ Notebook not found: {notebook_path}")
        return
        
    with open(notebook_path) as f:
        nb = json.load(f)
    
    print(f"📓 Inspecting notebook: {notebook_path}")
    print(f"📊 Total cells: {len(nb['cells'])}")
    print("=" * 60)
    
    for i, cell in enumerate(nb['cells']):
        if cell['cell_type'] == 'code':
            source = ''.join(cell.get('source', []))
            
            # Filter by keyword if provided
            if filter_keyword and filter_keyword.lower() not in source.lower():
                continue
                
            print(f"\n🔍 CELL {i} ({'code'})")
            print("📝 SOURCE:")
            print(source[:300] + "..." if len(source) > 300 else source)
            
            if 'outputs' in cell and cell['outputs']:
                print(f"\n📤 OUTPUTS ({len(cell['outputs'])} outputs):")
                for j, output in enumerate(cell['outputs']):
                    output_type = output.get('output_type', 'unknown')
                    print(f"  Output {j}: type={output_type}")
                    
                    if 'text' in output:
                        text = ''.join(output['text'])
                        print(f"    Text length: {len(text)} chars")
                        
                        # Show first few lines for context
                        lines = text.split('\n')[:5]
                        for line in lines:
                            if line.strip():
                                print(f"    > {line[:80]}...")
                                
                        # Check for interesting patterns
                        patterns = ['BAML', 'Parsed', 'Response', 'Error', 'Exception']
                        found_patterns = [p for p in patterns if p in text]
                        if found_patterns:
                            print(f"    🎯 Found patterns: {found_patterns}")
                            
                    elif 'data' in output:
                        data_keys = list(output['data'].keys())
                        print(f"    Data keys: {data_keys}")
                        
                    # Check for execution errors
                    if output_type == 'error':
                        print(f"    ❌ ERROR: {output.get('ename', 'Unknown')}")
                        print(f"    💬 Message: {output.get('evalue', 'No message')}")
                        if 'traceback' in output:
                            print(f"    📍 Traceback: {len(output['traceback'])} lines")
                            # Show last few lines of traceback
                            for line in output['traceback'][-3:]:
                                print(f"    🔍 {line.strip()}")
                        
            else:
                print("\n📤 No outputs")
                
            print("-" * 40)

def main():
    if len(sys.argv) < 2:
        print("Usage: python inspect_notebook.py <notebook_path> [filter_keyword]")
        sys.exit(1)
        
    notebook_path = sys.argv[1]
    filter_keyword = sys.argv[2] if len(sys.argv) > 2 else None
    
    inspect_notebook(notebook_path, filter_keyword)

if __name__ == '__main__':
    main()

================================================
FILE: workshops/2025-07-16/hack/minimal_test.ipynb
================================================
{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "import sys\n",
        "print(\"Hello stdout!\")\n",
        "print(\"Hello stderr!\", file=sys.stderr)\n",
        "with open(\"test_output.txt\", \"w\") as f:\n",
        "    f.write(\"Notebook executed successfully!\\n\")\n",
        "print(\"✅ Test complete\")"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.8.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}

================================================
FILE: workshops/2025-07-16/hack/test_log_capture.sh
================================================
#!/bin/bash
set -e

echo "🧪 Testing BAML Log Capture..."

# Clean up any previous test
rm -f test_capture.ipynb
rm -rf tmp/test_capture_*

# Generate test notebook
echo "📝 Generating test notebook..."
uv run python walkthroughgen_py.py simple_log_test.yaml -o test_capture.ipynb

# Run in sim
echo "🚀 Running test in sim..."
./test_notebook_colab_sim.sh test_capture.ipynb > /dev/null 2>&1

# Find the executed notebook in the timestamped directory
NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1)
NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb"

echo "📋 Analyzing results from $NOTEBOOK_PATH..."

# First dump debug info
echo "🔍 Dumping debug info..."
python3 inspect_notebook.py "$NOTEBOOK_PATH" "run_with_baml_logs"

echo ""
echo "📊 Running log capture analysis..."

# Check for BAML log patterns in the executed notebook
python3 analyze_log_capture.py "$NOTEBOOK_PATH"

echo "🧹 Cleaning up..."
rm -f test_capture.ipynb

================================================
FILE: workshops/2025-07-16/hack/testing.md
================================================
# Jupyter Notebook Testing Framework

This document describes the general testing framework for validating any functionality in Jupyter notebooks, with a specific example of testing BAML log capture.

## General Framework

### Overview

The testing framework provides a complete iteration loop for testing notebook implementations:

1. **Generate** test notebooks with specific functionality 
2. **Execute** notebooks in a simulated Google Colab environment  
3. **Analyze** executed notebooks for expected outputs and behaviors
4. **Report** clear pass/fail results

### Core Components

#### Notebook Simulator (`test_notebook_colab_sim.sh`)

The simulation script creates a realistic Google Colab environment for any notebook:

**Environment Setup:**
- Creates timestamped test directory: `./tmp/test_YYYYMMDD_HHMMSS/`
- Sets up fresh Python virtual environment
- Installs Jupyter dependencies (`notebook`, `nbconvert`, `ipykernel`)

**Notebook Execution:**
- Copies test notebook to clean environment
- Uses `ExecutePreprocessor` to run all cells (simulates Colab execution)
- **Critical:** Activates virtual environment before execution
- **Critical:** Saves executed notebook with cell outputs back to disk

**Usage:**
```bash
./test_notebook_colab_sim.sh your_notebook.ipynb
```

The simulator will:
- Execute all cells in the notebook
- Preserve the test directory for inspection
- Show final directory structure
- Report success/failure

#### Output Inspector (`inspect_notebook.py`)

Debug utility for examining notebook cell outputs in detail:

**Features:**
- Shows cell source code and execution counts  
- Displays all output types (stream, execute_result, error)
- Highlights patterns in output text
- Shows execution errors with tracebacks
- Filters cells by keywords for focused debugging

**Usage:**
```bash
# Inspect all cells
python3 inspect_notebook.py path/to/notebook.ipynb

# Filter for specific content
python3 inspect_notebook.py path/to/notebook.ipynb "keyword"

# Look for errors
python3 inspect_notebook.py path/to/notebook.ipynb "error"
```

**Sample Output:**
```
🔍 CELL 0 (code)
📝 SOURCE:
import sys
print("Hello!")
print("Error!", file=sys.stderr)

📤 OUTPUTS (2 outputs):
  Output 0: type=stream
    Text length: 7 chars
    > Hello!...
  Output 1: type=stream  
    Text length: 7 chars
    > Error!...
    🎯 Found patterns: ['Error']
```

### Key Insights for Notebook Testing

#### Execution Environment
1. **Virtual environment activation is critical** - Without it, execution fails silently
2. **Output persistence must be explicit** - `ExecutePreprocessor` only modifies notebook in memory
3. **Check execution counts** - `execution_count=None` means cell never executed
4. **Handle different output types** - stream, execute_result, error, display_data

#### Common Debugging Steps
1. **Verify basic execution:**
   ```bash
   python3 -c "
   import json
   nb = json.load(open('path/to/notebook.ipynb'))
   print('Execution counts:', [cell.get('execution_count') for cell in nb['cells'] if cell['cell_type']=='code'])
   "
   ```

2. **Check for execution errors:**
   ```bash
   python3 inspect_notebook.py path/to/notebook.ipynb "error"
   ```

3. **Look for specific output patterns:**
   ```bash
   python3 inspect_notebook.py path/to/notebook.ipynb "your_pattern"
   ```

### Creating Custom Tests

#### 1. Minimal Test Template

Create a simple notebook that tests basic functionality:

```json
{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": [
        "# Test basic execution\n",
        "print('Hello from notebook!')\n",
        "\n",
        "# Test file creation\n",
        "with open('test.txt', 'w') as f:\n",
        "    f.write('Test successful\\n')\n",
        "\n",
        "# Test error handling\n",
        "try:\n",
        "    result = your_function_to_test()\n",
        "    print(f'Result: {result}')\n",
        "except Exception as e:\n",
        "    print(f'Error: {e}')"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python", 
      "name": "python3"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}
```

#### 2. Test Script Template

```bash
#!/bin/bash
set -e

echo "🧪 Testing [Your Feature]..."

# Clean up any previous test
rm -f test_notebook.ipynb

# Generate or copy your test notebook
cp your_test_notebook.ipynb test_notebook.ipynb

# Run in simulator
echo "🚀 Running test in sim..."
./test_notebook_colab_sim.sh test_notebook.ipynb

# Find the executed notebook
NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1)
NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb"

# Analyze results
echo "📋 Analyzing results..."
python3 inspect_notebook.py "$NOTEBOOK_PATH" "your_search_term"

# Add your custom analysis
python3 -c "
import json
with open('$NOTEBOOK_PATH') as f:
    nb = json.load(f)

# Your custom analysis logic here
success = check_for_expected_outputs(nb)

if success:
    print('✅ PASS: Test succeeded!')
else:
    print('❌ FAIL: Test failed!')
    exit(1)
"

echo "🧹 Cleaning up..."
rm -f test_notebook.ipynb
```

---

## Use Case: BAML Log Capture Testing

This section demonstrates how to use the general framework for a specific use case: testing BAML log capture in notebooks.

### Problem Statement

BAML (a language model framework) uses FFI bindings to a Rust binary and outputs logs to stderr. We need to test whether different log capture methods can successfully capture these logs in Jupyter notebook cells.

### Test Implementation

#### Test Configuration (`simple_log_test.yaml`)

```yaml
title: "BAML Log Capture Test"
text: "Simple test for log capture"

sections:
  - title: "Log Capture Test"
    steps:
      - baml_setup: true
      - fetch_file:
          src: "walkthrough/01-agent.baml"
          dest: "baml_src/agent.baml"
      - file:
          src: "./simple_main.py"
      - text: "Testing log capture with show_logs=true:"
      - run_main:
          args: "What is 2+2?"
          show_logs: true
```

#### Test Function (`simple_main.py`)

```python
def main(message="What is 2+2?"):
    """Simple main function that calls BAML directly"""
    client = get_baml_client()
    
    # Call the BAML function - this should generate logs
    result = client.DetermineNextStep(f"User asked: {message}")
    
    print(f"Input: {message}")
    print(f"Result: {result}")
    return result
```

#### Log Capture Implementation

The current working implementation in `walkthroughgen_py.py`:

```python
def run_with_baml_logs(func, *args, **kwargs):
    """Test log capture using IPython capture_output"""
    # Ensure BAML_LOG is set
    if 'BAML_LOG' not in os.environ:
        os.environ['BAML_LOG'] = 'info'
    
    print(f"[LOG CAPTURE TEST] Running with BAML_LOG={os.environ.get('BAML_LOG')}...")
    
    # Capture both stdout and stderr
    with capture_output() as captured:
        result = func(*args, **kwargs)
    
    # Display captured outputs
    if captured.stdout:
        print("=== Captured Stdout ===")
        print(captured.stdout)
    
    if captured.stderr:
        print("=== Captured BAML Logs ===")
        print(captured.stderr)
    else:
        print("=== No BAML Logs Captured ===")
    
    print("=== Function Result ===")
    print(result)
    
    return result
```

### Test Execution

#### Main Test Script (`test_log_capture.sh`)

```bash
#!/bin/bash
set -e

echo "🧪 Testing BAML Log Capture..."

# Generate test notebook from YAML config
echo "📝 Generating test notebook..."
uv run python walkthroughgen_py.py simple_log_test.yaml -o test_capture.ipynb

# Run in simulator  
echo "🚀 Running test in sim..."
./test_notebook_colab_sim.sh test_capture.ipynb

# Find the executed notebook
NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1)
NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb"

echo "📋 Analyzing results from $NOTEBOOK_PATH..."

# Debug output
echo "🔍 Dumping debug info..."
python3 inspect_notebook.py "$NOTEBOOK_PATH" "run_with_baml_logs"

# Analyze for BAML log patterns
echo "📊 Running log capture analysis..."
python3 analyze_log_capture.py "$NOTEBOOK_PATH"

echo "🧹 Cleaning up..."
rm -f test_capture.ipynb
```

#### Analysis Script (`analyze_log_capture.py`)

```python
#!/usr/bin/env python3
import json
import sys
import os

def check_logs(notebook_path):
    """Check if BAML logs were captured in the notebook"""
    
    with open(notebook_path) as f:
        nb = json.load(f)
    
    found_log_pattern = False
    found_capture_test = False
    
    for i, cell in enumerate(nb['cells']):
        if cell['cell_type'] == 'code' and 'outputs' in cell:
            source = ''.join(cell.get('source', []))
            if 'run_with_baml_logs' in source:
                found_capture_test = True
                print(f'Found log capture test in cell {i}')
                
                # Check outputs for BAML logs
                for output in cell['outputs']:
                    if output.get('output_type') == 'stream' and 'text' in output:
                        text = ''.join(output['text'])
                        # Look for the specific BAML log pattern
                        if '---Parsed Response (class DoneForNow)---' in text:
                            found_log_pattern = True
                            print(f'✅ FOUND BAML LOG PATTERN in cell {i} output!')
    
    return found_capture_test, found_log_pattern

# Run analysis and return pass/fail
capture_test_found, log_pattern_found = check_logs(sys.argv[1])

if not capture_test_found:
    print('❌ FAIL: No log capture test found in notebook')
    sys.exit(1)

if log_pattern_found:
    print('✅ PASS: BAML logs successfully captured in notebook output!')
    sys.exit(0)
else:
    print('❌ FAIL: BAML log pattern not found in captured output')
    sys.exit(1)
```

### Expected Output Flow

#### Successful Test Run:
```bash
$ ./test_log_capture.sh

🧪 Testing BAML Log Capture...
📝 Generating test notebook...
Generated notebook: test_capture.ipynb
🚀 Running test in sim...
🧪 Creating clean test environment in: ./tmp/test_20250716_191106
📁 Test directory will be preserved for inspection
🐍 Creating fresh Python virtual environment...
📦 Installing Jupyter dependencies...
🏃 Running notebook in clean environment...
✅ Notebook executed successfully!
💾 Executed notebook saved with outputs

📋 Analyzing results from tmp/test_20250716_191106/test_notebook.ipynb...
🔍 Dumping debug info...
Found log capture test in cell 11

📤 OUTPUTS (3 outputs):
  Output 0: type=stream
    Text length: 49 chars
    > [LOG CAPTURE TEST] Running with BAML_LOG=info......
  Output 1: type=stream
    Text length: 1272 chars
    > 2025-07-16T19:11:22.445 [BAML [92mINFO[0m] [35mFunction DetermineNextStep[0m...
    🎯 Found patterns: ['BAML', 'Parsed', 'Response']

📊 Running log capture analysis...
Found log capture test in cell 11
✅ FOUND BAML LOG PATTERN in cell 11 output!
✅ PASS: BAML logs successfully captured in notebook output!
🧹 Cleaning up...
```

### Key BAML-Specific Insights

1. **BAML logs go to stderr** - Due to FFI bindings to Rust binary
2. **Requires `BAML_LOG=info`** - Environment variable controls verbosity  
3. **Logs include ANSI color codes** - Need to handle terminal formatting
4. **Pattern matching** - Look for `---Parsed Response (class DoneForNow)---` to confirm successful execution
5. **IPython capture_output() works** - Successfully captures stderr in notebook context

### Iteration Loop Benefits

This framework enables rapid testing of different log capture approaches:

1. **Modify** the `run_with_baml_logs` function in `walkthroughgen_py.py`
2. **Run** `./test_log_capture.sh`  
3. **Get** immediate pass/fail feedback
4. **Debug** with `inspect_notebook.py` if needed
5. **Repeat** until working implementation found

This same pattern can be applied to test any notebook functionality: library integrations, environment setup, output formatting, error handling, etc.

================================================
FILE: workshops/2025-07-16/pyproject.toml
================================================
[project]
name = "workshops"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
    "baml>=0.19.1",
    "jupyter>=1.1.1",
    "nbformat>=5.10.4",
    "pyyaml>=6.0.2",
]


================================================
FILE: workshops/2025-07-16/test_notebook_colab_sim.sh
================================================
#!/bin/bash
# Simulate Google Colab environment for testing notebooks

set -e

NOTEBOOK_PATH="$1"

if [ -z "$NOTEBOOK_PATH" ]; then
    echo "Usage: $0 <notebook_path>"
    exit 1
fi

# Get absolute path of notebook
NOTEBOOK_PATH=$(realpath "$NOTEBOOK_PATH")

# Create test directory in current folder
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
TEMP_DIR="./tmp/test_${TIMESTAMP}"
mkdir -p "$TEMP_DIR"
echo "🧪 Creating clean test environment in: $TEMP_DIR"

# Don't auto-cleanup so we can inspect it
echo "📁 Test directory will be preserved for inspection"

# Change to temp directory
cd "$TEMP_DIR"

# Create fresh Python virtual environment
echo "🐍 Creating fresh Python virtual environment..."
python3 -m venv venv
source venv/bin/activate

# Install jupyter dependencies
echo "📦 Installing Jupyter dependencies..."
pip install --quiet notebook nbconvert ipykernel

# Copy notebook to temp directory
cp "$NOTEBOOK_PATH" test_notebook.ipynb

# Create a Python script to execute the notebook
cat > run_notebook.py << 'EOF'
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor
import sys
import os

# Ensure OPENAI_API_KEY is passed through
if 'OPENAI_API_KEY' in os.environ:
    print(f"✅ OPENAI_API_KEY is set")
else:
    print("⚠️  Warning: OPENAI_API_KEY not set")

# Read notebook
with open('test_notebook.ipynb', 'r') as f:
    nb = nbformat.read(f, as_version=4)

# Execute ALL cells (just like Colab)
ep = ExecutePreprocessor(timeout=120, kernel_name='python3')

print("🚀 Executing notebook (this simulates Google Colab)...")
print("=" * 60)

try:
    ep.preprocess(nb, {'metadata': {'path': '.'}})
    print("\n✅ Notebook executed successfully!")
    
    # Save the executed notebook back to disk
    with open('test_notebook.ipynb', 'w') as f:
        nbformat.write(nb, f)
    print("💾 Executed notebook saved with outputs")
    
    # Show final directory structure
    print("\n📁 Final directory structure:")
    for root, dirs, files in os.walk('.'):
        level = root.replace('.', '').count(os.sep)
        indent = ' ' * 2 * level
        print(f"{indent}{os.path.basename(root)}/")
        subindent = ' ' * 2 * (level + 1)
        for file in files[:10]:  # Limit output
            if not file.startswith('.'):
                print(f"{subindent}{file}")
                
except Exception as e:
    print(f"\n❌ Error executing notebook: {e}")
    if hasattr(e, 'traceback'):
        print("\nTraceback:")
        print(e.traceback)
    sys.exit(1)
EOF

# Run the notebook
echo "🏃 Running notebook in clean environment..."
source venv/bin/activate && python run_notebook.py

# Check what BAML files were created
echo -e "\n📄 BAML files created:"
if [ -d "baml_src" ]; then
    ls -la baml_src/
else
    echo "No baml_src directory found"
fi

# Check if Python BAML client was generated
echo -e "\n🐍 Python BAML client:"
if [ -d "baml_client" ]; then
    # Check if it's Python or TypeScript
    if [ -f "baml_client/__init__.py" ]; then
        echo "✅ Python client generated"
        ls baml_client/*.py 2>/dev/null | head -5
    else
        echo "❌ TypeScript client generated (not Python)"
        ls baml_client/*.ts 2>/dev/null | head -5
    fi
else
    echo "No baml_client directory found"
fi

echo -e "\n✨ Test complete!"

================================================
FILE: workshops/2025-07-16/walkthrough/00-.gitignore
================================================
baml_client/
node_modules/


================================================
FILE: workshops/2025-07-16/walkthrough/00-main.py
================================================
def hello():
    print('hello, world!')

def main():
    hello()

================================================
FILE: workshops/2025-07-16/walkthrough/00-package.json
================================================
{
    "name": "my-agent",
    "version": "0.1.0",
    "private": true,
    "scripts": {
      "dev": "tsx src/index.ts",
      "build": "tsc"
    },
    "dependencies": {
      "tsx": "^4.15.0",
      "typescript": "^5.0.0"
    },
    "devDependencies": {
      "@types/node": "^20.0.0",
      "@typescript-eslint/eslint-plugin": "^6.0.0",
      "@typescript-eslint/parser": "^6.0.0",
      "eslint": "^8.0.0"
    }
  }
  

================================================
FILE: workshops/2025-07-16/walkthrough/00-tsconfig.json
================================================
{
    "compilerOptions": {
      "target": "ES2017",
      "lib": ["esnext"],
      "allowJs": true,
      "skipLibCheck": true,
      "strict": true,
      "noEmit": true,
      "esModuleInterop": true,
      "module": "esnext",
      "moduleResolution": "bundler",
      "resolveJsonModule": true,
      "isolatedModules": true,
      "jsx": "preserve",
      "incremental": true,
      "plugins": [],
      "paths": {
        "@/*": ["./*"]
      }
    },
    "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
    "exclude": ["node_modules", "walkthrough"]
  }
  

================================================
FILE: workshops/2025-07-16/walkthrough/01-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow {
    client "openai/gpt-4o"

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-07-16/walkthrough/01-agent.py
================================================
import json
from typing import Dict, Any, List

# tool call or a respond to human tool
AgentResponse = Any  # This will be the return type from b.DetermineNextStep

class Event:
    def __init__(self, type: str, data: Any):
        self.type = type
        self.data = data

class Thread:
    def __init__(self, events: List[Dict[str, Any]]):
        self.events = events
    
    def serialize_for_llm(self):
        # can change this to whatever custom serialization you want to do, XML, etc
        # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return json.dumps(self.events)

# right now this just runs one turn with the LLM, but
# we'll update this function to handle all the agent logic
def agent_loop(thread: Thread) -> AgentResponse:
    b = get_baml_client()  # This will be defined by the BAML setup
    next_step = b.DetermineNextStep(thread.serialize_for_llm())
    return next_step

================================================
FILE: workshops/2025-07-16/walkthrough/01-main.py
================================================
def main(message="hello from the notebook!"):
    # Create a new thread with the user's message as the initial event
    thread = Thread([{"type": "user_input", "data": message}])
    
    # Run the agent loop with the thread
    result = agent_loop(thread)
    print(result)

================================================
FILE: workshops/2025-07-16/walkthrough/02-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

function DetermineNextStep(
    thread: string 
) -> CalculatorTools | DoneForNow {
    client "openai/gpt-4o"

    // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

================================================
FILE: workshops/2025-07-16/walkthrough/02-main.py
================================================
def main(message="hello from the notebook!"):
    # Create a new thread with the user's message
    thread = Thread([{"type": "user_input", "data": message}])
    
    # Get BAML client
    b = get_baml_client()
    
    # Get the next step from the agent - just show the tool call
    next_step = b.DetermineNextStep(thread.serialize_for_llm())
    
    # Print the raw response to show the tool call
    print(next_step)

================================================
FILE: workshops/2025-07-16/walkthrough/02-tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool


class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}


================================================
FILE: workshops/2025-07-16/walkthrough/03-agent.py
================================================
import json
from typing import Dict, Any, List

class Thread:
    def __init__(self, events: List[Dict[str, Any]]):
        self.events = events
    
    def serialize_for_llm(self):
        # can change this to whatever custom serialization you want to do, XML, etc
        # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return json.dumps(self.events)


def agent_loop(thread: Thread) -> str:
    b = get_baml_client()
    
    while True:
        next_step = b.DetermineNextStep(thread.serialize_for_llm())
        print("nextStep", next_step)
        
        if next_step.intent == "done_for_now":
            # response to human, return the next step object
            return next_step.message
        elif next_step.intent == "add":
            thread.events.append({
                "type": "tool_call",
                "data": next_step.__dict__
            })
            result = next_step.a + next_step.b
            print("tool_response", result)
            thread.events.append({
                "type": "tool_response",
                "data": result
            })
            continue
        else:
            raise ValueError(f"Unknown intent: {next_step.intent}")

================================================
FILE: workshops/2025-07-16/walkthrough/03-main.py
================================================
def main(message="hello from the notebook!"):
    # Create a new thread with the user's message
    thread = Thread([{"type": "user_input", "data": message}])
    
    # Run the agent loop with full tool handling
    result = agent_loop(thread)
    
    # Print the final response
    print(f"\nFinal response: {result}")

================================================
FILE: workshops/2025-07-16/walkthrough/03b-agent.py
================================================
import json
from typing import Dict, Any, List, Union

class Thread:
    def __init__(self, events: List[Dict[str, Any]]):
        self.events = events
    
    def serialize_for_llm(self):
        # can change this to whatever custom serialization you want to do, XML, etc
        # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return json.dumps(self.events)

def handle_next_step(next_step, thread: Thread) -> Thread:
    result: float
    
    if next_step.intent == "add":
        result = next_step.a + next_step.b
        print("tool_response", result)
        thread.events.append({
            "type": "tool_response",
            "data": result
        })
        return thread
    elif next_step.intent == "subtract":
        result = next_step.a - next_step.b
        print("tool_response", result)
        thread.events.append({
            "type": "tool_response",
            "data": result
        })
        return thread
    elif next_step.intent == "multiply":
        result = next_step.a * next_step.b
        print("tool_response", result)
        thread.events.append({
            "type": "tool_response",
            "data": result
        })
        return thread
    elif next_step.intent == "divide":
        result = next_step.a / next_step.b
        print("tool_response", result)
        thread.events.append({
            "type": "tool_response",
            "data": result
        })
        return thread

def agent_loop(thread: Thread) -> str:
    b = get_baml_client()
    
    while True:
        next_step = b.DetermineNextStep(thread.serialize_for_llm())
        print("nextStep", next_step)
        
        thread.events.append({
            "type": "tool_call",
            "data": next_step.__dict__
        })
        
        if next_step.intent == "done_for_now":
            # response to human, return the next step object
            return next_step.message
        elif next_step.intent in ["add", "subtract", "multiply", "divide"]:
            thread = handle_next_step(next_step, thread)

================================================
FILE: workshops/2025-07-16/walkthrough/03b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        // can change this to whatever custom serialization you want to do, XML, etc
        // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
        return JSON.stringify(this.events);
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<string> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
                // response to human, return the next step object
                return nextStep.message;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-07-16/walkthrough/04-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
}

test SimpleMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [{"type": "user_input", "data": "can you multiply 3 and 4"}]
    "#
  }
}

================================================
FILE: workshops/2025-07-16/walkthrough/04b-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent_check, {{this.intent == "done_for_now"}})
}

test SimpleMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [{"type": "user_input", "data": "can you multiply 3 and 4"}]
    "#
  }
  @@assert(intent_check, {{this.intent == "multiply"}})
  @@assert(a_check, {{this.a == 3}})
  @@assert(b_check, {{this.b == 4}})
}

================================================
FILE: workshops/2025-07-16/walkthrough/04c-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent_check, {{this.intent == "done_for_now"}})
}

test SimpleMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [{"type": "user_input", "data": "can you multiply 3 and 4"}]
    "#
  }
  @@assert(intent_check, {{this.intent == "multiply"}})
  @@assert(a_check, {{this.a == 3}})
  @@assert(b_check, {{this.b == 4}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {"type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"},
        {"type": "tool_call", "data": {"intent": "multiply", "a": 3, "b": 4}},
        {"type": "tool_response", "data": 12},
        {"type": "tool_call", "data": {"intent": "divide", "a": 12, "b": 2}},
        {"type": "tool_response", "data": 6}
      ]
    "#
  }
  @@assert(intent_check, {{this.intent == "add"}})
  @@assert(a_check, {{this.a == 6}})
  @@assert(b_check, {{this.b == 12}})
}

test CompleteConversation {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {"type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"},
        {"type": "tool_call", "data": {"intent": "multiply", "a": 3, "b": 4}},
        {"type": "tool_response", "data": 12},
        {"type": "tool_call", "data": {"intent": "divide", "a": 12, "b": 2}},
        {"type": "tool_response", "data": 6},
        {"type": "tool_call", "data": {"intent": "add", "a": 6, "b": 12}},
        {"type": "tool_response", "data": 18}
      ]
    "#
  }
  @@assert(intent_check, {{this.intent == "done_for_now"}})
  @@assert(answer_check, {{"18" in this.message}})
}

================================================
FILE: workshops/2025-07-16/walkthrough/05-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}

class ClarificationRequest {
    intent "request_more_information"
    message string @description("you can request more information from the user")
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool | ClarificationRequest {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

================================================
FILE: workshops/2025-07-16/walkthrough/05-agent.py
================================================
# Agent implementation with clarification support
import json

def agent_loop(thread, clarification_handler, max_iterations=3):
    """Run the agent loop until we get a final answer (max 3 iterations)."""
    iteration_count = 0
    while iteration_count < max_iterations:
        iteration_count += 1
        print(f"🔄 Agent loop iteration {iteration_count}/{max_iterations}")
        
        # Get the client
        baml_client = get_baml_client()
        
        # Serialize the thread
        thread_json = json.dumps(thread.events, indent=2)
        
        # Call the agent
        result = baml_client.DetermineNextStep(thread_json)
        
        # Check what type of result we got based on intent
        if hasattr(result, 'intent'):
            if result.intent == 'done_for_now':
                return result.message
            elif result.intent == 'request_more_information':
                # Get clarification from the human
                clarification = clarification_handler(result.message)
                
                # Add the clarification to the thread
                thread.events.append({
                    "type": "clarification_request",
                    "data": result.message
                })
                thread.events.append({
                    "type": "clarification_response",
                    "data": clarification
                })
                
                # Continue the loop with the clarification
            elif result.intent in ['add', 'subtract', 'multiply', 'divide']:
                # Execute the appropriate tool based on intent
                if result.intent == 'add':
                    result_value = result.a + result.b
                    operation = f"add({result.a}, {result.b})"
                elif result.intent == 'subtract':
                    result_value = result.a - result.b
                    operation = f"subtract({result.a}, {result.b})"
                elif result.intent == 'multiply':
                    result_value = result.a * result.b
                    operation = f"multiply({result.a}, {result.b})"
                elif result.intent == 'divide':
                    if result.b == 0:
                        result_value = "Error: Division by zero"
                    else:
                        result_value = result.a / result.b
                    operation = f"divide({result.a}, {result.b})"
                
                print(f"🔧 Calling tool: {operation} = {result_value}")
                
                # Add the tool call and result to the thread
                thread.events.append({
                    "type": "tool_call",
                    "data": {
                        "tool": "calculator",
                        "operation": operation,
                        "result": result_value
                    }
                })
        else:
            return "Error: Unexpected result type"
    
    # If we've reached max iterations without a final answer
    return f"Agent reached maximum iterations ({max_iterations}) without completing the task."

class Thread:
    """Simple thread to track conversation history."""
    def __init__(self, events):
        self.events = events

================================================
FILE: workshops/2025-07-16/walkthrough/05-main.py
================================================
def get_human_input(prompt):
    """Get input from human, handling both Colab and local environments."""
    print(f"\n🤔 {prompt}")
    
    if IN_COLAB:
        # In Colab, use actual input
        response = input("Your response: ")
    else:
        # In local testing, return a fixed response
        response = "I meant to multiply 3 and 4"
        print(f"📝 [Auto-response for testing]: {response}")
    
    return response

def main(message="hello from the notebook!"):
    # Function to handle clarification requests
    def handle_clarification(question):
        return get_human_input(f"The agent needs clarification: {question}")
    
    # Create a new thread with the user's message
    thread = Thread([{"type": "user_input", "data": message}])
    
    print(f"🚀 Starting agent with message: '{message}'")
    
    # Run the agent loop
    result = agent_loop(thread, handle_clarification)
    
    # Print the final response
    print(f"\n✅ Final response: {result}")

================================================
FILE: workshops/2025-07-16/walkthrough/05b-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-07-16/walkthrough/05c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "hello!"
      }
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      {
        "type": "user_input",
        "data": "can you multiply 3 and 4?"
      }
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
      [
        {
          "type": "user_input",
          "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "multiply",
            "a": 3,
            "b": 4
          }
        },
        {
          "type": "tool_response",
          "data": 12
        },
        {
          "type": "tool_call", 
          "data": {
            "intent": "divide",
            "a": 12,
            "b": 2
          }
        },
        {
          "type": "tool_response",
          "data": 6
        },
        {
          "type": "tool_call",
          "data": {
            "intent": "add", 
            "a": 6,
            "b": 12
          }
        },
        {
          "type": "tool_response",
          "data": 18
        }
      ]
    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        [
        {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
        {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
        {"type":"human_response","data":"lets try 12 instead"},
      ]
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(a, {{this.b == 12}})
  @@assert(b, {{this.a == 3}})
}
        

================================================
FILE: workshops/2025-07-16/walkthrough/06-agent.baml
================================================
class DoneForNow {
  intent "done_for_now"
  message string 
}

class AddTool {
    intent "add"
    a int | float
    b int | float
}

class SubtractTool {
    intent "subtract"
    a int | float
    b int | float
}

class MultiplyTool {
    intent "multiply"
    a int | float
    b int | float
}

class DivideTool {
    intent "divide"
    a int | float
    b int | float
}

class ClarificationRequest {
    intent "request_more_information"
    message string @description("you can request more information from the user")
}

function DetermineNextStep(
    thread: string 
) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool | ClarificationRequest {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        Before deciding on the next step, think through the situation:
        1. What has been asked?
        2. What information do I have?
        3. What tools are available to me?
        4. What is the most logical next step?

        <reasoning>
        Think step by step about what needs to be done next.
        </reasoning>

        What should the next step be?

        {{ ctx.output_format }}
    "#
}

================================================
FILE: workshops/2025-07-16/walkthrough/07-agent.py
================================================
# Agent with configurable serialization formats
import json

class Thread:
    """Thread that can serialize to different formats."""
    def __init__(self, events):
        self.events = events
    
    def serialize_as_json(self):
        """Serialize thread events to pretty-printed JSON."""
        return json.dumps(self.events, indent=2)
    
    def serialize_as_xml(self):
        """Serialize thread events to XML format for better token efficiency."""
        import yaml
        xml_parts = ["<thread>"]
        
        for event in self.events:
            event_type = event['type']
            event_data = event['data']
            
            if event_type == 'user_input':
                xml_parts.append(f'  <user_input>{event_data}</user_input>')
            elif event_type == 'tool_call':
                # Use YAML for tool call args - more compact than nested XML
                yaml_content = yaml.dump(event_data, default_flow_style=False).strip()
                xml_parts.append(f'  <{event_data["tool"]}>')
                xml_parts.append('    ' + '\n    '.join(yaml_content.split('\n')))
                xml_parts.append(f'  </{event_data["tool"]}>')
            elif event_type == 'clarification_request':
                xml_parts.append(f'  <clarification_request>{event_data}</clarification_request>')
            elif event_type == 'clarification_response':
                xml_parts.append(f'  <clarification_response>{event_data}</clarification_response>')
        
        xml_parts.append("</thread>")
        return "\n".join(xml_parts)

def agent_loop(thread, clarification_handler, use_xml=True):
    """Run the agent loop with configurable serialization."""
    while True:
        # Get the client
        baml_client = get_baml_client()
        
        # Serialize the thread based on format preference
        if use_xml:
            thread_str = thread.serialize_as_xml()
            print(f"📄 Using XML serialization ({len(thread_str)} chars)")
        else:
            thread_str = thread.serialize_as_json()
            print(f"📄 Using JSON serialization ({len(thread_str)} chars)")
        
        # Call the agent
        result = baml_client.DetermineNextStep(thread_str)
        
        # Check what type of result we got based on intent
        if hasattr(result, 'intent'):
            if result.intent == 'done_for_now':
                return result.message
            elif result.intent == 'request_more_information':
                # Get clarification from the human
                clarification = clarification_handler(result.message)
                
                # Add the clarification to the thread
                thread.events.append({
                    "type": "clarification_request",
                    "data": result.message
                })
                thread.events.append({
                    "type": "clarification_response",
                    "data": clarification
                })
                
                # Continue the loop with the clarification
            elif result.intent in ['add', 'subtract', 'multiply', 'divide']:
                # Execute the appropriate tool based on intent
                if result.intent == 'add':
                    result_value = result.a + result.b
                    operation = f"add({result.a}, {result.b})"
                elif result.intent == 'subtract':
                    result_value = result.a - result.b
                    operation = f"subtract({result.a}, {result.b})"
                elif result.intent == 'multiply':
                    result_value = result.a * result.b
                    operation = f"multiply({result.a}, {result.b})"
                elif result.intent == 'divide':
                    if result.b == 0:
                        result_value = "Error: Division by zero"
                    else:
                        result_value = result.a / result.b
                    operation = f"divide({result.a}, {result.b})"
                
                print(f"🔧 Calling tool: {operation} = {result_value}")
                
                # Add the tool call and result to the thread
                thread.events.append({
                    "type": "tool_call",
                    "data": {
                        "tool": "calculator",
                        "operation": operation,
                        "result": result_value
                    }
                })
        else:
            return "Error: Unexpected result type"

================================================
FILE: workshops/2025-07-16/walkthrough/07-main.py
================================================
def main(message="hello from the notebook!", use_xml=True):
    # Function to handle clarification requests
    def handle_clarification(question):
        return get_human_input(f"The agent needs clarification: {question}")
    
    # Create a new thread with the user's message
    thread = Thread([{"type": "user_input", "data": message}])
    
    print(f"🚀 Starting agent with message: '{message}'")
    print(f"📋 Using {'XML' if use_xml else 'JSON'} format for thread serialization")
    
    # Run the agent loop with XML serialization
    result = agent_loop(thread, handle_clarification, use_xml=use_xml)
    
    # Print the final response
    print(f"\n✅ Final response: {result}")

================================================
FILE: workshops/2025-07-16/walkthrough/07b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "add":
            case "subtract":
            case "multiply":
            case "divide":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-07-16/walkthrough/07c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow

class ClarificationRequest {
  intent "request_more_information" @description("you can request more information from me")
  message string
}

class DoneForNow {
  intent "done_for_now"

  message string @description(#"
    message to send to the user about the work that was done. 
  "#)
}

function DetermineNextStep(
    thread: string 
) -> HumanTools | CalculatorTools {
    client "openai/gpt-4o"

    prompt #"
        {{ _.role("system") }}

        You are a helpful assistant that can help with tasks.

        {{ _.role("user") }}

        You are working on the following thread:

        {{ thread }}

        What should the next step be?

        {{ ctx.output_format }}

        Always think about what to do next first, like:

        - ...
        - ...
        - ...

        {...} // schema
    "#
}

test HelloWorld {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        hello!
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperation {
  functions [DetermineNextStep]
  args {
    thread #"
      <user_input>
        can you multiply 3 and 4?
      </user_input>
    "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
}

test LongMath {
  functions [DetermineNextStep]
  args {
    thread #"
         <user_input>
    can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
    </user_input>


    <multiply>
    a: 3
    b: 4
    </multiply>


    <tool_response>
    12
    </tool_response>


    <divide>
    a: 12
    b: 2
    </divide>


    <tool_response>
    6
    </tool_response>


    <add>
    a: 6
    b: 12
    </add>


    <tool_response>
    18
    </tool_response>

    "#
  }
  @@assert(intent, {{this.intent == "done_for_now"}})
  @@assert(answer, {{"18" in this.message}})
}


test MathOperationWithClarification {
  functions [DetermineNextStep]
  args {
    thread #"
          <user_input>
          can you multiply 3 and fe1iiaff10
          </user_input>
      "#
  }
  @@assert(intent, {{this.intent == "request_more_information"}})
}

test MathOperationPostClarification {
  functions [DetermineNextStep]
  args {
    thread #"
        <user_input>
        can you multiply 3 and FD*(#F&& ?
        </user_input>

        <request_more_information>
        message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
        </request_more_information>

        <human_response>
        lets try 12 instead
        </human_response>
      "#
  }
  @@assert(intent, {{this.intent == "multiply"}})
  @@assert(b, {{this.a == 3}})
  @@assert(a, {{this.b == 12}})
}
        

================================================
FILE: workshops/2025-07-16/walkthrough/08-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    const result = await agentLoop(thread);
    res.json(result);
});

// GET /thread/:id - Get thread status 
app.get('/thread/:id', (req, res) => {
    // optional - add state
    res.status(404).json({ error: "Not implemented yet" });
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-07-16/walkthrough/09-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    
    thread.events.push({
        type: "human_response",
        data: req.body.message
    });
    
    // loop until stop event
    const newThread = await agentLoop(thread);
    
    store.update(req.params.id, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-07-16/walkthrough/09-state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';


// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
    private threads: Map<string, Thread> = new Map();
    
    create(thread: Thread): string {
        const id = crypto.randomUUID();
        this.threads.set(id, thread);
        return id;
    }
    
    get(id: string): Thread | undefined {
        return this.threads.get(id);
    }
    
    update(id: string, thread: Thread): void {
        this.threads.set(id, thread);
    }
}

================================================
FILE: workshops/2025-07-16/walkthrough/10-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";

export interface Event {
    type: string
    data: any;
}

export class Thread {
    events: Event[] = [];

    constructor(events: Event[]) {
        this.events = events;
    }

    serializeForLLM() {
        return this.events.map(e => this.serializeOneEvent(e)).join("\n");
    }

    trimLeadingWhitespace(s: string) {
        return s.replace(/^[ \t]+/gm, '');
    }

    serializeOneEvent(e: Event) {
        return this.trimLeadingWhitespace(`
            <${e.data?.intent || e.type}>
            ${
            typeof e.data !== 'object' ? e.data :
            Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
            </${e.data?.intent || e.type}>
        `)
    }

    awaitingHumanResponse(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
    }

    awaitingHumanApproval(): boolean {
        const lastEvent = this.events[this.events.length - 1];
        return lastEvent.data.intent === 'divide';
    }
}

export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;

export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise<Thread> {
    let result: number;
    switch (nextStep.intent) {
        case "add":
            result = nextStep.a + nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "subtract":
            result = nextStep.a - nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "multiply":
            result = nextStep.a * nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
        case "divide":
            result = nextStep.a / nextStep.b;
            console.log("tool_response", result);
            thread.events.push({
                "type": "tool_response",
                "data": result
            });
            return thread;
    }
}

export async function agentLoop(thread: Thread): Promise<Thread> {

    while (true) {
        const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
        console.log("nextStep", nextStep);

        thread.events.push({
            "type": "tool_call",
            "data": nextStep
        });

        switch (nextStep.intent) {
            case "done_for_now":
            case "request_more_information":
                // response to human, return the thread
                return thread;
            case "divide":
                // divide is scary, return it for human approval
                return thread;
            case "add":
            case "subtract":
            case "multiply":
                thread = await handleNextStep(nextStep, thread);
        }
    }
}


================================================
FILE: workshops/2025-07-16/walkthrough/10-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const newThread = await agentLoop(thread);

    store.update(req.params.id, newThread);

    lastEvent = newThread.events[newThread.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(newThread);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-07-16/walkthrough/11-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
            }
        }
    }) 

    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-07-16/walkthrough/11b-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-07-16/walkthrough/11c-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line

import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";

export async function cli() {
    // Get command line arguments, skipping the first two (node and script name)
    const args = process.argv.slice(2);

    if (args.length === 0) {
        console.error("Error: Please provide a message as a command line argument");
        process.exit(1);
    }

    // Join all arguments into a single message
    const message = args.join(" ");

    // Create a new thread with the user's message as the initial event
    const thread = new Thread([{ type: "user_input", data: message }]);

    // Run the agent loop with the thread
    let newThread = await agentLoop(thread);
    let lastEvent = newThread.events.slice(-1)[0];

    while (lastEvent.data.intent !== "done_for_now") {
        const responseEvent = await askHuman(lastEvent);
        thread.events.push(responseEvent);
        newThread = await agentLoop(thread);
        lastEvent = newThread.events.slice(-1)[0];
    }

    // print the final result
    // optional - you could loop here too 
    console.log(lastEvent.data.message);
    process.exit(0);
}

async function askHuman(lastEvent: Event): Promise<Event> {
    if (process.env.HUMANLAYER_API_KEY) {
        return await askHumanEmail(lastEvent);
    } else {
        return await askHumanCLI(lastEvent.data.message);
    }
}

async function askHumanCLI(message: string): Promise<Event> {
    const readline = require('readline').createInterface({
        input: process.stdin,
        output: process.stdout
    });

    return new Promise((resolve) => {
        readline.question(`${message}\n> `, (answer: string) => {
            resolve({ type: "human_response", data: answer });
        });
    });
}

export async function askHumanEmail(lastEvent: Event): Promise<Event> {
    if (!process.env.HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }
    const hl = humanlayer({ //reads apiKey from env
        // name of this agent
        runId: "12fa-cli-agent",
        verbose: true,
        contactChannel: {
            // agent should request permission via email
            email: {
                address: process.env.HUMANLAYER_EMAIL,
                // custom email body - jinja
                template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
<br><br>
reply to this email to approve
{% endif %}`
            }
        }
    }) 

    if (lastEvent.data.intent === "request_more_information") {
        // fetch response synchronously - this will block until reply
        const response = await hl.fetchHumanResponse({
            spec: {
                msg: lastEvent.data.message
            }
        })
        return {
            "type": "tool_response",
            "data": response
        }
    }
    
    if (lastEvent.data.intent === "divide") {
        // fetch approval synchronously - this will block until reply
        const response = await hl.fetchHumanApproval({
            spec: {
                fn: "divide",
                kwargs: {
                    a: lastEvent.data.a,
                    b: lastEvent.data.b
                }
            }
        })

        if (response.approved) {
            const result = lastEvent.data.a / lastEvent.data.b;
            console.log("tool_response", result);
            return {
                "type": "tool_response",
                "data": result
            };
        } else {
            return {
                "type": "tool_response",
                "data": `user denied operation ${lastEvent.data.intent}
                with feedback: ${response.comment}`
            };
        }
    }
    throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}

================================================
FILE: workshops/2025-07-16/walkthrough/12-1-server-init.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const result = await agentLoop(thread);

    store.update(req.params.id, result);

    lastEvent = result.events[result.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(result);
});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-07-16/walkthrough/12-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();


const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-07-16/walkthrough/12a-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-07-16/walkthrough/12aa-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();


const getHumanlayer = () => {
    const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
    if (!HUMANLAYER_EMAIL) {
        throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
    }

    const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
    if (!HUMANLAYER_API_KEY) {
        throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
    }
    return humanlayer({
        runId: `12fa-agent`,
        contactChannel: {
            email: { address: HUMANLAYER_EMAIL }
        }
    });
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    // run agent loop asynchronously, return immediately
    Promise.resolve().then(async () => {
        const threadId = store.create(thread);
        const newThread = await agentLoop(thread);
        
        store.update(threadId, newThread);

        const lastEvent = newThread.events[newThread.events.length - 1];

        if (thread.awaitingHumanResponse()) {
            const hl = getHumanlayer();
            // create a human contact - returns immediately
            hl.createHumanContact({
                spec: {
                    msg: lastEvent.data.message,
                    state: {
                        thread_id: threadId,
                    }
                }
            });
        }
    });

    res.json({ status: "processing" });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

const handleHumanResponse = async (req: Request, res: Response) => {

}

app.post('/webhook', async (req: Request, res: Response) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-07-16/walkthrough/12b-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { V1Beta2EmailEventReceived, V1Beta2FunctionCallCompleted, V1Beta2HumanContactCompleted } from 'humanlayer';

const app = express();
app.use(express.json());
app.set('json spaces', 2);

const store = new ThreadStore();

// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
    const thread = new Thread([{
        type: "user_input",
        data: req.body.message
    }]);
    
    const threadId = store.create(thread);
    const newThread = await agentLoop(thread);
    
    store.update(threadId, newThread);

    const lastEvent = newThread.events[newThread.events.length - 1];
    // If we exited the loop, include the response URL so the client can
    // push a new message onto the thread
    lastEvent.data.response_url = `/thread/${threadId}/response`;

    console.log("returning last event from endpoint", lastEvent);

    res.json({ 
        thread_id: threadId,
        ...newThread 
    });
});

// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
    const thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }
    res.json(thread);
});


type ApprovalPayload = {
    type: "approval";
    approved: boolean;
    comment?: string;
}

type ResponsePayload = {
    type: "response";
    response: string;
}

type Payload = ApprovalPayload | ResponsePayload;

// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
    let thread = store.get(req.params.id);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    const body: Payload = req.body;

    let lastEvent = thread.events[thread.events.length - 1];

    if (thread.awaitingHumanResponse() && body.type === 'response') {
        thread.events.push({
            type: "human_response",
            data: body.response
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
        // push feedback onto the thread
        thread.events.push({
            type: "tool_response",
            data: `user denied the operation with feedback: "${body.comment}"`
        });
    } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
        // approved, run the tool, pushing results onto the thread
        await handleNextStep(lastEvent.data, thread);
    } else {
        res.status(400).json({
            error: "Invalid request: " + body.type,
            awaitingHumanResponse: thread.awaitingHumanResponse(),
            awaitingHumanApproval: thread.awaitingHumanApproval()
        });
        return;
    }

    
    // loop until stop event
    const result = await agentLoop(thread);

    store.update(req.params.id, result);

    lastEvent = result.events[result.events.length - 1];
    lastEvent.data.response_url = `/thread/${req.params.id}/response`;

    console.log("returning last event from endpoint", lastEvent);
    
    res.json(result);
});

type WebhookResponse = V1Beta2HumanContactCompleted;

app.post('/webhook/response', async (req, res) => {
    console.log("webhook response", req.body);
    const response = req.body as WebhookResponse;

    // response is guaranteed to be set on a webhook
    const humanResponse: string = response.event.status?.response as string;

    const threadId = response.event.spec.state?.thread_id;
    if (!threadId) {
        return res.status(400).json({ error: "Thread ID not found" });
    }

    const thread = store.get(threadId);
    if (!thread) {
        return res.status(404).json({ error: "Thread not found" });
    }

    if (!thread.awaitingHumanResponse()) {
        return res.status(400).json({ error: "Thread is not awaiting human response" });
    }

    thread.events.push({
        type: "human_response",
        data: response.event.status?.response
    });

});

const port = process.env.PORT || 3000;
app.listen(port, () => {
    console.log(`Server running on port ${port}`);
});

export { app };

================================================
FILE: workshops/2025-07-16/walkthrough.yaml
================================================
title: "Building the 12-factor agent template from scratch in Python"
text: "Steps to start from a bare Python repo and build up a 12-factor agent. This walkthrough will guide you through creating a Python agent that follows the 12-factor methodology with BAML."

targets:
  - ipynb: "./build/workshop-2025-07-16.ipynb"

sections:
  - name: hello-world
    title: "Chapter 0 - Hello World"
    text: "Let's start with a basic Python setup and a hello world program."
    steps:
      - text: |
          This guide will walk you through building agents in Python with BAML.

          We'll start simple with a hello world program and gradually build up to a full agent.

          For this notebook, you'll need to have your OpenAI API key saved in Google Colab secrets.

          ## Where We're Headed

          Before we dive in, let's understand the journey ahead. We're building toward **micro-agents in deterministic DAGs** - a powerful pattern that combines the flexibility of AI with the reliability of traditional software.

          📖 **Learn more**: [A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md)

          ![Software DAG Evolution](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/010-software-dag.png)

      - text: "Here's our simple hello world program:"
      - file: {src: ./walkthrough/00-main.py}
      - text: "Let's run it to verify it works:"
      - run_main: {regenerate_baml: false}

  - name: cli-and-agent
    title: "Chapter 1 - CLI and Agent Loop"
    text: "Now let's add BAML and create our first agent with a CLI interface."
    steps:
      - text: |
          In this chapter, we'll integrate BAML to create an AI agent that can respond to user input.

          ## What is BAML?

          BAML (Boundary Markup Language) is a domain-specific language designed to help developers build reliable AI workflows and agents. Created by [BoundaryML](https://www.boundaryml.com/) (a Y Combinator W23 company), BAML adds the engineering to prompt engineering.

          ### Why BAML?

          - **Type-safe outputs**: Get fully type-safe outputs from LLMs, even when streaming
          - **Language agnostic**: Works with Python, TypeScript, Ruby, Go, and more
          - **LLM agnostic**: Works with any LLM provider (OpenAI, Anthropic, etc.)
          - **Better performance**: State-of-the-art structured outputs that outperform even OpenAI's native function calling
          - **Developer-friendly**: Native VSCode extension with syntax highlighting, autocomplete, and interactive playground

          ### Learn More

          - 📚 [Official Documentation](https://docs.boundaryml.com/home)
          - 💻 [GitHub Repository](https://github.com/BoundaryML/baml)
          - 🎯 [What is BAML?](https://docs.boundaryml.com/guide/introduction/what-is-baml)
          - 📖 [BAML Examples](https://github.com/BoundaryML/baml-examples)
          - 🏢 [Company Website](https://www.boundaryml.com/)
          - 📰 [Blog: AI Agents Need a New Syntax](https://www.boundaryml.com/blog/ai-agents-need-new-syntax)

          BAML turns prompt engineering into schema engineering, where you focus on defining the structure of your data rather than wrestling with prompts. This approach leads to more reliable and maintainable AI applications.

          ### Note on Developer Experience

          BAML works much better in VS Code with their official extension, which provides syntax highlighting, autocomplete, inline testing, and an interactive playground. However, for this notebook tutorial, we'll work with BAML files directly without the enhanced IDE features.

          ## Factor 1: Natural Language to Tool Calls

          What we're building implements the first factor of 12-factor agents - converting natural language into structured tool calls.

          📖 **Learn more**: [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)

          ![Natural Language to Tool Calls](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/110-natural-language-tool-calls.png)

          First, let's set up BAML support in our notebook.
      - baml_setup: true
      - command: "!ls baml_src"
      - text: |
          Now let's create our agent that will use BAML to process user input.

          First, we'll define the core agent logic:
      - file: {src: ./walkthrough/01-agent.py}
      - text: |
          Next, we need to define the BAML function that our agent will use.

          ### Understanding BAML Syntax

          BAML files define:
          - **Classes**: Structured output schemas (like `DoneForNow` below)
          - **Functions**: AI-powered functions that take inputs and return structured outputs
          - **Tests**: Example inputs/outputs to validate your prompts

          This BAML file defines what our agent can do:
      - fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
      - command: "!ls baml_src"
      - text: |
          Now let's create our main function that accepts a message parameter:
      - file: {src: ./walkthrough/01-main.py}
      - text: |
          Let's test our agent! Try calling main() with different messages:
          - `main("What's the weather like?")`
          - `main("Tell me a joke")`
          - `main("How are you doing today?")`

          in this case, we'll use the baml_generate function to
          generate the pydantic and python bindings from our
          baml source, but in the future we'll skip this step as it
          is done automatically by the get_baml_client() function

      - run_main: {regenerate_baml: true, args: "Hello from the Python notebook!"}

  - name: calculator-tools
    title: "Chapter 2 - Add Calculator Tools"
    text: "Let's add some calculator tools to our agent."
    steps:
      - text: |
          Let's start by adding a tool definition for the calculator.

          These are simple structured outputs that we'll ask the model to
          return as a "next step" in the agentic loop.

          ## Factor 4: Tools Are Structured Outputs

          This chapter demonstrates that tools are just structured JSON outputs from the LLM - nothing more complex!

          📖 **Learn more**: [Factor 4: Tools Are Structured Outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)

          ![Tools Are Structured Outputs](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/140-tools-are-just-structured-outputs.png)

      - fetch_file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml}
      - command: "!ls baml_src"
      - text: |
          Now, let's update the agent's DetermineNextStep method to
          expose the calculator tools as potential next steps.

      - fetch_file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Now let's update our main function to show the tool call:
      - file: {src: ./walkthrough/02-main.py}
      - text: |
          Let's try out the calculator! The agent should recognize that you want to perform a calculation
          and return the appropriate tool call instead of just a message.
      - run_main: {regenerate_baml: false, args: "can you add 3 and 4"}

  - name: tool-loop
    title: "Chapter 3 - Process Tool Calls in a Loop"
    text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
    steps:
      - text: |
          In this chapter, we'll enhance our agent to process tool calls in a loop. This means:
          - The agent can call multiple tools in sequence
          - Each tool result is fed back to the agent
          - The agent continues until it has a final answer

          ## The Agent Loop Pattern

          We're implementing the core agent loop - where the AI makes decisions, executes tools, and continues until done.

          ![Agent Loop Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/027-agent-loop-animation.gif)

          ## Factor 5: Unify Execution State

          Notice how we're storing everything as events in our Thread - this is Factor 5 in action!

          📖 **Learn more**: [Factor 5: Unify Execution State](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)

          ![Unify State Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/155-unify-state-animation.gif)

          Let's update our agent to handle tool calls properly:
      - file: {src: ./walkthrough/03-agent.py}
      - text: |
          Now let's update our main function to use the new agent loop:
      - file: {src: ./walkthrough/03-main.py}
      - text: |
          Let's try it out! The agent should now call the tool and return the calculated result:
      - run_main: {regenerate_baml: false, args: "can you add 3 and 4"}
      - text: |
          You should see the agent:
          1. Recognize it needs to use the add tool
          2. Call the tool with the correct parameters
          3. Get the result (7)
          4. Generate a final response incorporating the result

          For more complex calculations, we need to handle all calculator operations. Let's add support for subtract, multiply, and divide:
      - file: {src: ./walkthrough/03b-agent.py}
      - text: |
          Now let's test subtraction:
      - run_main: {regenerate_baml: false, args: "can you subtract 3 from 4"}
      - text: |
          Test multiplication:
      - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4"}
      - text: |
          Finally, let's test a complex multi-step calculation:
      - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"}
      - text: |
          Congratulations! You've taken your first step into hand-rolling an agent loop.

          Key concepts you've learned:
          - **Thread Management**: Tracking conversation history and tool calls
          - **Tool Execution**: Processing different tool types and returning results
          - **Agent Loop**: Continuing until the agent has a final answer

          From here, we'll start incorporating more intermediate and advanced concepts for 12-factor agents.

  - name: baml-tests
    title: "Chapter 4 - Add Tests to agent.baml"
    text: "Let's add some tests to our BAML agent."
    steps:
      - text: |
          In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly.

          ## Why Test BAML Functions?

          - **Catch regressions**: Ensure changes don't break existing behavior
          - **Document behavior**: Tests serve as living documentation
          - **Validate edge cases**: Test complex scenarios and conversation flows
          - **CI/CD integration**: Run tests automatically in your pipeline

          Let's start with a simple test that checks the agent's ability to handle basic interactions:
      - fetch_file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Run the tests to see them in action:
      - command: "!baml-cli test"
      - text: |
          Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output.

          ## BAML Assertion Syntax

          Assertions use the `@@assert` directive:
          ```
          @@assert(name, {{condition}})
          ```

          - `name`: A descriptive name for the assertion
          - `condition`: A boolean expression using `this` to access the output
      - fetch_file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Run the tests again to see assertions in action:
      - command: "!baml-cli test"
      - text: |
          Finally, let's add more complex test cases that test multi-step conversations.

          These tests simulate an entire conversation flow, including:
          - User input
          - Tool calls made by the agent
          - Tool responses
          - Final agent response
      - fetch_file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Run the comprehensive test suite:
      - command: "!baml-cli test"
      - text: |
          ## Key Testing Concepts

          1. **Test Structure**: Each test specifies functions, arguments, and assertions
          2. **Progressive Testing**: Start simple, then test complex scenarios
          3. **Conversation History**: Test how the agent handles multi-turn conversations
          4. **Tool Integration**: Verify the agent correctly uses tools in sequence

          With these tests in place, you can confidently modify your agent knowing that core functionality is protected by automated tests!

  - name: human-tools
    title: "Chapter 5 - Multiple Human Tools"
    text: |
      In this section, we'll add support for multiple tools that serve to contact humans.
    steps:
      - text: |
          So far, our agent only returns a final answer with "done_for_now". But what if the agent needs clarification?

          Let's add a new tool that allows the agent to request more information from the user.

          ## Why Human-in-the-Loop?

          - **Handle ambiguous inputs**: When user input is unclear or contains typos
          - **Request missing information**: When the agent needs more context
          - **Confirm sensitive operations**: Before performing important actions
          - **Interactive workflows**: Build conversational agents that engage users

          First, let's update our BAML file to include a ClarificationRequest tool:
      - fetch_file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Now let's update our agent to handle clarification requests:
      - file: {src: ./walkthrough/05-agent.py}
      - text: |
          Finally, let's create a main function that handles human interaction:
      - file: {src: ./walkthrough/05-main.py}
      - text: |
          Let's test with an ambiguous input that should trigger a clarification request:
      - run_main: {regenerate_baml: false, args: "can you multiply 3 and FD*(#F&&"}
      - text: |
          You should see:
          1. The agent recognizes the input is unclear
          2. It asks for clarification
          3. In Colab, you'll be prompted to type a response
          4. In local testing, an auto-response is provided
          5. The agent continues with the clarified input

          ## Interactive Testing in Colab

          When running in Google Colab, the `input()` function will create an interactive text box where you can type your response. Try different clarifications to see how the agent adapts!

          ## Key Concepts

          - **Human Tools**: Special tool types that return control to the human
          - **Conversation Flow**: The agent can pause execution to get human input
          - **Context Preservation**: The full conversation history is maintained
          - **Flexible Handling**: Different behaviors for different environments

  - name: customize-prompt
    title: "Chapter 6 - Customize Your Prompt with Reasoning"
    text: |
      In this section, we'll explore how to customize the prompt of the agent with reasoning steps.

      This is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
    steps:
      - text: |
          ## Why Add Reasoning to Prompts?

          Adding explicit reasoning steps to your prompts can significantly improve agent performance:

          - **Better decisions**: The model thinks through problems step-by-step
          - **Transparency**: You can see the model's thought process
          - **Fewer errors**: Structured thinking reduces mistakes
          - **Debugging**: Easier to identify where reasoning went wrong

          Let's update our agent prompt to include a reasoning step:
      - fetch_file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Now let's test it with a simple calculation to see the reasoning in action:

      - run_main: {args: "can you multiply 3 and 4"}
      - text: |
          The model uses explicit reasoning steps to think through the problem before making a decision.

          ## Advanced Prompt Engineering

          You can enhance your prompts further by:
          - Adding specific reasoning templates for different tasks
          - Including examples of good reasoning
          - Structuring the reasoning with numbered steps
          - Adding checks for common mistakes

          The key is to guide the model's thinking process while still allowing flexibility.

  - name: context-window
    title: "Chapter 7 - Customize Your Context Window"
    text: |
      In this section, we'll explore how to customize the context window of the agent.

      This is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
    steps:
      - text: |
          ## Context Window Serialization

          How you format your conversation history can significantly impact:
          - **Token usage**: Some formats are more efficient
          - **Model understanding**: Clear structure helps the model
          - **Debugging**: Readable formats help development

          Let's implement two serialization formats: pretty-printed JSON and XML.
      - file: {src: ./walkthrough/07-agent.py}
      - text: |
          Now let's create a main function that can switch between formats:
      - file: {src: ./walkthrough/07-main.py}
      - text: |
          Let's test with JSON format first:
      - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: false}}
      - text: |
          Now let's try the same with XML format:
      - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: true}}
      - text: |
          ## XML vs JSON Trade-offs

          **XML Benefits**:
          - More token-efficient for nested data
          - Clear hierarchy with opening/closing tags
          - Better for long conversations

          **JSON Benefits**:
          - Familiar to most developers
          - Easy to parse and debug
          - Native to JavaScript/Python

          Choose based on your specific needs and token constraints!


================================================
FILE: workshops/2025-07-16/walkthrough_python_enhanced.yaml
================================================
title: "Building the 12-factor agent template from scratch in Python"
text: "Steps to start from a bare Python repo and build up a 12-factor agent. This walkthrough will guide you through creating a Python agent that follows the 12-factor methodology with BAML."
targets:
  - ipynb: "./build/workshop-2025-07-16.ipynb"
sections:
  - name: hello-world
    title: "Chapter 0 - Hello World"
    text: "Let's start with a basic Python setup and a hello world program."
    steps:
      - text: |
          This guide will walk you through building agents in Python with BAML.

          We'll start simple with a hello world program and gradually build up to a full agent.

          For this notebook, you'll need to have your OpenAI API key saved in Google Colab secrets.

          ## Where We're Headed

          Before we dive in, let's understand the journey ahead. We're building toward **micro-agents in deterministic DAGs** - a powerful pattern that combines the flexibility of AI with the reliability of traditional software.

          📖 **Learn more**: [A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md)

          ![Software DAG Evolution](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/010-software-dag.png)

      - text: "Here's our simple hello world program:"
      - file: {src: ./walkthrough/00-main.py}
      - text: "Let's run it to verify it works:"
      - run_main: {regenerate_baml: false}

  - name: cli-and-agent
    title: "Chapter 1 - CLI and Agent Loop"
    text: "Now let's add BAML and create our first agent with a CLI interface."
    steps:
      - text: |
          In this chapter, we'll integrate BAML to create an AI agent that can respond to user input.

          ## What is BAML?

          BAML (Boundary Markup Language) is a domain-specific language designed to help developers build reliable AI workflows and agents. Created by [BoundaryML](https://www.boundaryml.com/) (a Y Combinator W23 company), BAML adds the engineering to prompt engineering.

          ### Why BAML?

          - **Type-safe outputs**: Get fully type-safe outputs from LLMs, even when streaming
          - **Language agnostic**: Works with Python, TypeScript, Ruby, Go, and more
          - **LLM agnostic**: Works with any LLM provider (OpenAI, Anthropic, etc.)
          - **Better performance**: State-of-the-art structured outputs that outperform even OpenAI's native function calling
          - **Developer-friendly**: Native VSCode extension with syntax highlighting, autocomplete, and interactive playground

          ### Learn More

          - 📚 [Official Documentation](https://docs.boundaryml.com/home)
          - 💻 [GitHub Repository](https://github.com/BoundaryML/baml)
          - 🎯 [What is BAML?](https://docs.boundaryml.com/guide/introduction/what-is-baml)
          - 📖 [BAML Examples](https://github.com/BoundaryML/baml-examples)
          - 🏢 [Company Website](https://www.boundaryml.com/)
          - 📰 [Blog: AI Agents Need a New Syntax](https://www.boundaryml.com/blog/ai-agents-need-new-syntax)

          BAML turns prompt engineering into schema engineering, where you focus on defining the structure of your data rather than wrestling with prompts. This approach leads to more reliable and maintainable AI applications.

          ### Note on Developer Experience

          BAML works much better in VS Code with their official extension, which provides syntax highlighting, autocomplete, inline testing, and an interactive playground. However, for this notebook tutorial, we'll work with BAML files directly without the enhanced IDE features.

          ## Factor 1: Natural Language to Tool Calls

          What we're building implements the first factor of 12-factor agents - converting natural language into structured tool calls.

          📖 **Learn more**: [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)

          ![Natural Language to Tool Calls](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/110-natural-language-tool-calls.png)

          First, let's set up BAML support in our notebook.
      - baml_setup: true
      - text: |
          Now let's create our agent that will use BAML to process user input.

          First, we'll define the core agent logic:
      - file: {src: ./walkthrough/01-agent.py}
      - text: |
          Next, we need to define the BAML function that our agent will use.

          ### Understanding BAML Syntax

          BAML files define:
          - **Classes**: Structured output schemas (like `DoneForNow` below)
          - **Functions**: AI-powered functions that take inputs and return structured outputs
          - **Tests**: Example inputs/outputs to validate your prompts

          This BAML file defines what our agent can do:
      - fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Now let's create our main function that accepts a message parameter:
      - file: {src: ./walkthrough/01-main.py}
      - text: |
          Let's test our agent! Try calling main() with different messages:
          - `main("What's the weather like?")`
          - `main("Tell me a joke")`
          - `main("How are you doing today?")`
      - run_main: {regenerate_baml: true, args: "Hello from the Python notebook!"}

  - name: calculator-tools
    title: "Chapter 2 - Add Calculator Tools"
    text: "Let's add some calculator tools to our agent."
    steps:
      - text: |
          Let's start by adding a tool definition for the calculator.

          These are simple structured outputs that we'll ask the model to
          return as a "next step" in the agentic loop.

          ## Factor 4: Tools Are Structured Outputs

          This chapter demonstrates that tools are just structured JSON outputs from the LLM - nothing more complex!

          📖 **Learn more**: [Factor 4: Tools Are Structured Outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)

          ![Tools Are Structured Outputs](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/140-tools-are-just-structured-outputs.png)

      - fetch_file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml}
      - text: |
          Now, let's update the agent's DetermineNextStep method to
          expose the calculator tools as potential next steps.

      - fetch_file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Now let's update our main function to show the tool call:
      - file: {src: ./walkthrough/02-main.py}
      - text: |
          Let's try out the calculator! The agent should recognize that you want to perform a calculation
          and return the appropriate tool call instead of just a message.
      - run_main: {regenerate_baml: true, args: "can you add 3 and 4"}

  - name: tool-loop
    title: "Chapter 3 - Process Tool Calls in a Loop"
    text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
    steps:
      - text: |
          In this chapter, we'll enhance our agent to process tool calls in a loop. This means:
          - The agent can call multiple tools in sequence
          - Each tool result is fed back to the agent
          - The agent continues until it has a final answer

          ## The Agent Loop Pattern

          We're implementing the core agent loop - where the AI makes decisions, executes tools, and continues until done.

          ![Agent Loop Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/027-agent-loop-animation.gif)

          ## Factor 5: Unify Execution State

          Notice how we're storing everything as events in our Thread - this is Factor 5 in action!

          📖 **Learn more**: [Factor 5: Unify Execution State](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)

          ![Unify State Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/155-unify-state-animation.gif)

          Let's update our agent to handle tool calls properly:
      - file: {src: ./walkthrough/03-agent.py}
      - text: |
          Now let's update our main function to use the new agent loop:
      - file: {src: ./walkthrough/03-main.py}
      - text: |
          Let's try it out! The agent should now call the tool and return the calculated result:
      - run_main: {regenerate_baml: true, args: "can you add 3 and 4"}
      - text: |
          You should see the agent:
          1. Recognize it needs to use the add tool
          2. Call the tool with the correct parameters
          3. Get the result (7)
          4. Generate a final response incorporating the result

          For more complex calculations, we need to handle all calculator operations. Let's add support for subtract, multiply, and divide:
      - file: {src: ./walkthrough/03b-agent.py}
      - text: |
          Now let's test subtraction:
      - run_main: {regenerate_baml: false, args: "can you subtract 3 from 4"}
      - text: |
          Test multiplication:
      - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4"}
      - text: |
          Finally, let's test a complex multi-step calculation:
      - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"}
      - text: |
          Congratulations! You've taken your first step into hand-rolling an agent loop.

          Key concepts you've learned:
          - **Thread Management**: Tracking conversation history and tool calls
          - **Tool Execution**: Processing different tool types and returning results
          - **Agent Loop**: Continuing until the agent has a final answer

          From here, we'll start incorporating more intermediate and advanced concepts for 12-factor agents.

  - name: baml-tests
    title: "Chapter 4 - Add Tests to agent.baml"
    text: "Let's add some tests to our BAML agent."
    steps:
      - text: |
          In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly.

          ## Why Test BAML Functions?

          - **Catch regressions**: Ensure changes don't break existing behavior
          - **Document behavior**: Tests serve as living documentation
          - **Validate edge cases**: Test complex scenarios and conversation flows
          - **CI/CD integration**: Run tests automatically in your pipeline

          ## Factor 2: Own Your Prompts

          Testing is a key part of owning your prompts - you need to verify they work as expected!

          📖 **Learn more**: [Factor 2: Own Your Prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md)

          ![Own Your Prompts](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/120-own-your-prompts.png)

          Let's start with a simple test that checks the agent's ability to handle basic interactions:
      - fetch_file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Run the tests to see them in action:
      - command: "!baml-cli test"
      - text: |
          Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output.

          ## BAML Assertion Syntax

          Assertions use the `@@assert` directive:
          ```
          @@assert(name, {{condition}})
          ```

          - `name`: A descriptive name for the assertion
          - `condition`: A boolean expression using `this` to access the output
      - fetch_file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Run the tests again to see assertions in action:
      - command: "!baml-cli test"
      - text: |
          Finally, let's add more complex test cases that test multi-step conversations.

          These tests simulate an entire conversation flow, including:
          - User input
          - Tool calls made by the agent
          - Tool responses
          - Final agent response
      - fetch_file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Run the comprehensive test suite:
      - command: "!baml-cli test"
      - text: |
          ## Key Testing Concepts

          1. **Test Structure**: Each test specifies functions, arguments, and assertions
          2. **Progressive Testing**: Start simple, then test complex scenarios
          3. **Conversation History**: Test how the agent handles multi-turn conversations
          4. **Tool Integration**: Verify the agent correctly uses tools in sequence

          With these tests in place, you can confidently modify your agent knowing that core functionality is protected by automated tests!

  - name: human-tools
    title: "Chapter 5 - Multiple Human Tools"
    text: |
      In this section, we'll add support for multiple tools that serve to contact humans.
    steps:
      - text: |
          So far, our agent only returns a final answer with "done_for_now". But what if the agent needs clarification?

          Let's add a new tool that allows the agent to request more information from the user.

          ## Why Human-in-the-Loop?

          - **Handle ambiguous inputs**: When user input is unclear or contains typos
          - **Request missing information**: When the agent needs more context
          - **Confirm sensitive operations**: Before performing important actions
          - **Interactive workflows**: Build conversational agents that engage users

          ## Factor 7: Contact Humans with Tools

          This is a critical pattern - treating human interaction as just another tool call!

          📖 **Learn more**: [Factor 7: Contact Humans with Tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)

          ![Contact Humans with Tools](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/170-contact-humans-with-tools.png)

          This enables **outer-loop agents** - agents that can pause execution and wait for human input:

          ![Outer Loop Agents](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/175-outer-loop-agents.png)

          First, let's update our BAML file to include a ClarificationRequest tool:
      - fetch_file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Now let's update our agent to handle clarification requests:
      - file: {src: ./walkthrough/05-agent.py}
      - text: |
          Finally, let's create a main function that handles human interaction:
      - file: {src: ./walkthrough/05-main.py}
      - text: |
          Let's test with an ambiguous input that should trigger a clarification request:
      - run_main: {regenerate_baml: true, args: "can you multiply 3 and FD*(#F&&"}
      - text: |
          You should see:
          1. The agent recognizes the input is unclear
          2. It asks for clarification
          3. In Colab, you'll be prompted to type a response
          4. In local testing, an auto-response is provided
          5. The agent continues with the clarified input

          ## Interactive Testing in Colab

          When running in Google Colab, the `input()` function will create an interactive text box where you can type your response. Try different clarifications to see how the agent adapts!

          ## Key Concepts

          - **Human Tools**: Special tool types that return control to the human
          - **Conversation Flow**: The agent can pause execution to get human input
          - **Context Preservation**: The full conversation history is maintained
          - **Flexible Handling**: Different behaviors for different environments

  - name: customize-prompt
    title: "Chapter 6 - Customize Your Prompt with Reasoning"
    text: |
      In this section, we'll explore how to customize the prompt of the agent with reasoning steps.

      This is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
    steps:
      - text: |
          ## Why Add Reasoning to Prompts?

          Adding explicit reasoning steps to your prompts can significantly improve agent performance:

          - **Better decisions**: The model thinks through problems step-by-step
          - **Transparency**: You can see the model's thought process
          - **Fewer errors**: Structured thinking reduces mistakes
          - **Debugging**: Easier to identify where reasoning went wrong

          ## Factor 2: Own Your Prompts

          This chapter demonstrates taking full control of your prompts - they're first-class code!

          📖 **Learn more**: [Factor 2: Own Your Prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md)

          Let's update our agent prompt to include a reasoning step:
      - fetch_file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
      - text: |
          Now let's test it with a simple calculation to see the reasoning in action:
      - run_main: {regenerate_baml: true, args: "can you multiply 3 and 4"}
      - text: |
          You should notice in the BAML logs (if enabled) that the model now includes reasoning steps before deciding what to do.

          ## Advanced Prompt Engineering

          You can enhance your prompts further by:
          - Adding specific reasoning templates for different tasks
          - Including examples of good reasoning
          - Structuring the reasoning with numbered steps
          - Adding checks for common mistakes

          The key is to guide the model's thinking process while still allowing flexibility.

  - name: context-window
    title: "Chapter 7 - Customize Your Context Window"
    text: |
      In this section, we'll explore how to customize the context window of the agent.

      This is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
    steps:
      - text: |
          ## Context Window Serialization

          How you format your conversation history can significantly impact:
          - **Token usage**: Some formats are more efficient
          - **Model understanding**: Clear structure helps the model
          - **Debugging**: Readable formats help development

          ## Factor 3: Own Your Context Window

          Context engineering is everything! This is one of the most important factors.

          📖 **Learn more**: [Factor 3: Own Your Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)

          Let's implement two serialization formats: pretty-printed JSON and XML.
      - file: {src: ./walkthrough/07-agent.py}
      - text: |
          Now let's create a main function that can switch between formats:
      - file: {src: ./walkthrough/07-main.py}
      - text: |
          Let's test with JSON format first:
      - run_main: {regenerate_baml: true, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: false}}
      - text: |
          Now let's try the same with XML format:
      - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: true}}
      - text: |
          ## XML vs JSON Trade-offs

          **XML Benefits**:
          - More token-efficient for nested data
          - Clear hierarchy with opening/closing tags
          - Better for long conversations

          **JSON Benefits**:
          - Familiar to most developers
          - Easy to parse and debug
          - Native to JavaScript/Python

          Choose based on your specific needs and token constraints!

          ## What's Next?

          In the remaining chapters (8-12), we'll build on these foundations to add:
          - **API endpoints** for serving your agent
          - **State persistence** with async operations
          - **Human approval workflows** (Factor 8: Own Your Control Flow)
          - **Email-based approvals** via HumanLayer
          - **Webhook integration** for launch/pause/resume patterns (Factor 6)

          Each step brings us closer to production-ready agents that can handle real-world complexity!

          📖 **Further Reading**:
          - [Factor 6: Launch/Pause/Resume](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md)
          - [Factor 8: Own Your Control Flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md)
          - [Factor 9: Compact Errors](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md)
          - [Factor 10: Small, Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md)
          - [Factor 11: Trigger From Anywhere](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md)


================================================
FILE: workshops/2025-07-16/walkthroughgen_py.py
================================================
#!/usr/bin/env python3
"""Convert walkthrough.yaml to Jupyter notebook for BAML Python tutorials."""

import yaml
import nbformat
from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell
import os
import sys
from pathlib import Path
import argparse

def create_baml_setup_cells(nb):
    """Add BAML setup cells with explanation."""
    # Add explanation markdown
    explanation = """### BAML Setup

Don't worry too much about this setup code - it will make sense later! For now, just know that:
- BAML is a tool for working with language models
- We need some special setup code to make it work nicely in Google Colab
- The `get_baml_client()` function will be used to interact with AI models"""
    nb.cells.append(new_markdown_cell(explanation))
    
    # First cell: Install baml-py and pydantic
    install_code = "!pip install baml-py==0.202.0 pydantic"
    nb.cells.append(new_code_cell(install_code))
    
    # Second cell: Helper functions
    setup_code = '''import subprocess
import os

# Try to import Google Colab userdata, but don't fail if not in Colab
try:
    from google.colab import userdata
    IN_COLAB = True
except ImportError:
    IN_COLAB = False

def baml_generate():
    try:
        result = subprocess.run(
            ["baml-cli", "generate"],
            check=True,
            capture_output=True,
            text=True
        )
        if result.stdout:
            print("[baml-cli generate]\\n", result.stdout)
        if result.stderr:
            print("[baml-cli generate]\\n", result.stderr)
    except subprocess.CalledProcessError as e:
        msg = (
            f"`baml-cli generate` failed with exit code {e.returncode}\\n"
            f"--- STDOUT ---\\n{e.stdout}\\n"
            f"--- STDERR ---\\n{e.stderr}"
        )
        raise RuntimeError(msg) from None

def get_baml_client():
    """
    a bunch of fun jank to work around the google colab import cache
    """
    # Set API key from Colab secrets or environment
    if IN_COLAB:
        os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
    elif 'OPENAI_API_KEY' not in os.environ:
        print("Warning: OPENAI_API_KEY not set. Please set it in your environment.")
    
    baml_generate()
    
    # Force delete all baml_client modules from sys.modules
    import sys
    modules_to_delete = [key for key in sys.modules.keys() if key.startswith('baml_client')]
    for module in modules_to_delete:
        del sys.modules[module]
    
    # Now import fresh
    import baml_client
    return baml_client.sync_client.b
'''
    nb.cells.append(new_code_cell(setup_code))
    
    # Third cell: Initialize BAML
    init_code = "!baml-cli init"
    nb.cells.append(new_code_cell(init_code))
    

def process_step(nb, step, base_path, current_functions, section_name=None):
    """Process different step types."""
    if 'text' in step:
        # Add markdown cell
        nb.cells.append(new_markdown_cell(step['text']))
    
    if 'baml_setup' in step:
        # Add BAML setup cells
        create_baml_setup_cells(nb)
    
    if 'file' in step:
        src = step['file']['src']
        # For Python files, add the entire file content as a code cell
        if src.endswith('.py'):
            # Handle relative paths that start with ./
            if src.startswith('./'):
                file_path = base_path.parent / src[2:]
            else:
                file_path = base_path / src
            
            if file_path.exists():
                with open(file_path, 'r') as f:
                    content = f.read()
                # Add filename as comment at top
                code_with_header = f"# {src}\n{content}"
                nb.cells.append(new_code_cell(code_with_header))
            else:
                print(f"Warning: File not found: {file_path}")
                nb.cells.append(new_markdown_cell(f"**Error: File not found: {src}**"))
    
    if 'fetch_file' in step:
        # Fetch BAML file from GitHub
        src = step['fetch_file']['src']
        dest = step['fetch_file']['dest']
        github_url = f"https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/{src}"
        command = f"!curl -fsSL -o {dest} {github_url} && cat {dest}"
        nb.cells.append(new_code_cell(command))
    
    if 'dir' in step:
        # Create directory
        path = step['dir']['path']
        command = f"!mkdir -p {path}"
        nb.cells.append(new_code_cell(command))
    
    if 'command' in step:
        # Add command as code cell
        command = step['command'].strip()
        # Convert to notebook-style command
        if not command.startswith('!'):
            command = f"!{command}"
        nb.cells.append(new_code_cell(command))
    
    if 'run_main' in step:
        # Run main function
        regenerate = step['run_main'].get('regenerate_baml', False)
        if regenerate:
            nb.cells.append(new_code_cell("baml_generate()"))
        
        # Build the main() call
        call_parts = []
        
        # Check if args are provided
        args = step['run_main'].get('args', '')
        if args:
            call_parts.append(f'"{args}"')
        
        # Check if kwargs are provided
        kwargs = step['run_main'].get('kwargs', {})
        for key, value in kwargs.items():
            if isinstance(value, str):
                call_parts.append(f'{key}="{value}"')
            else:
                call_parts.append(f'{key}={value}')
        
        # Generate the function call
        if call_parts:
            main_call = f'main({", ".join(call_parts)})'
        else:
            main_call = "main()"
        
        # Execute the main function call
        nb.cells.append(new_code_cell(main_call))

def convert_walkthrough_to_notebook(yaml_path, output_path):
    """Convert walkthrough.yaml to Jupyter notebook."""
    # Load YAML
    with open(yaml_path, 'r') as f:
        walkthrough = yaml.safe_load(f)
    
    # Create notebook
    nb = new_notebook()
    
    # Add title
    title = walkthrough.get('title', 'Walkthrough')
    nb.cells.append(new_markdown_cell(f"# {title}"))
    
    # Add description
    if 'text' in walkthrough:
        nb.cells.append(new_markdown_cell(walkthrough['text']))
    
    # Process sections
    base_path = Path(yaml_path).parent
    current_functions = {}
    
    for section in walkthrough.get('sections', []):
        # Add section title
        section_title = section.get('title', section.get('name', 'Section'))
        section_name = section.get('name', '')
        nb.cells.append(new_markdown_cell(f"## {section_title}"))
        
        # Add section description
        if 'text' in section:
            nb.cells.append(new_markdown_cell(section['text']))
        
        # Process steps
        for step in section.get('steps', []):
            process_step(nb, step, base_path, current_functions, section_name)
    
    # Write notebook
    with open(output_path, 'w') as f:
        nbformat.write(nb, f)
    
    print(f"Generated notebook: {output_path}")

def main():
    parser = argparse.ArgumentParser(description='Convert walkthrough.yaml to Jupyter notebook')
    parser.add_argument('yaml_file', help='Path to walkthrough.yaml')
    parser.add_argument('-o', '--output', default='output.ipynb', help='Output notebook file')
    
    args = parser.parse_args()
    
    convert_walkthrough_to_notebook(args.yaml_file, args.output)

if __name__ == '__main__':
    main()

================================================
FILE: workshops/2025-07-16/workshop_final.ipynb
================================================
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "a55820ee",
   "metadata": {},
   "source": [
    "# Building the 12-factor agent template from scratch in Python"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ba52e30a",
   "metadata": {},
   "source": [
    "Steps to start from a bare Python repo and build up a 12-factor agent. This walkthrough will guide you through creating a Python agent that follows the 12-factor methodology with BAML."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "75b26c9b",
   "metadata": {},
   "source": [
    "## Chapter 0 - Hello World"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fa4b9e07",
   "metadata": {},
   "source": [
    "Let's start with a basic Python setup and a hello world program."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4e464227",
   "metadata": {},
   "source": [
    "This guide will walk you through building agents in Python with BAML.\n",
    "\n",
    "We'll start simple with a hello world program and gradually build up to a full agent.\n",
    "\n",
    "For this notebook, you'll need to have your OpenAI API key saved in Google Colab secrets.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "99dac1bb",
   "metadata": {},
   "source": [
    "Here's our simple hello world program:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9c6946fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/00-main.py\n",
    "def hello():\n",
    "    print('hello, world!')\n",
    "\n",
    "def main():\n",
    "    hello()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5523efac",
   "metadata": {},
   "source": [
    "Let's run it to verify it works:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6a437eb2",
   "metadata": {},
   "outputs": [],
   "source": [
    "main()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d9aa0df6",
   "metadata": {},
   "source": [
    "## Chapter 1 - CLI and Agent Loop"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "970c65da",
   "metadata": {},
   "source": [
    "Now let's add BAML and create our first agent with a CLI interface."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "976a0fca",
   "metadata": {},
   "source": [
    "In this chapter, we'll integrate BAML to create an AI agent that can respond to user input.\n",
    "\n",
    "## What is BAML?\n",
    "\n",
    "BAML (Boundary Markup Language) is a domain-specific language designed to help developers build reliable AI workflows and agents. Created by [BoundaryML](https://www.boundaryml.com/) (a Y Combinator W23 company), BAML adds the engineering to prompt engineering.\n",
    "\n",
    "### Why BAML?\n",
    "\n",
    "- **Type-safe outputs**: Get fully type-safe outputs from LLMs, even when streaming\n",
    "- **Language agnostic**: Works with Python, TypeScript, Ruby, Go, and more\n",
    "- **LLM agnostic**: Works with any LLM provider (OpenAI, Anthropic, etc.)\n",
    "- **Better performance**: State-of-the-art structured outputs that outperform even OpenAI's native function calling\n",
    "- **Developer-friendly**: Native VSCode extension with syntax highlighting, autocomplete, and interactive playground\n",
    "\n",
    "### Learn More\n",
    "\n",
    "- 📚 [Official Documentation](https://docs.boundaryml.com/home)\n",
    "- 💻 [GitHub Repository](https://github.com/BoundaryML/baml)\n",
    "- 🎯 [What is BAML?](https://docs.boundaryml.com/guide/introduction/what-is-baml)\n",
    "- 📖 [BAML Examples](https://github.com/BoundaryML/baml-examples)\n",
    "- 🏢 [Company Website](https://www.boundaryml.com/)\n",
    "- 📰 [Blog: AI Agents Need a New Syntax](https://www.boundaryml.com/blog/ai-agents-need-new-syntax)\n",
    "\n",
    "BAML turns prompt engineering into schema engineering, where you focus on defining the structure of your data rather than wrestling with prompts. This approach leads to more reliable and maintainable AI applications.\n",
    "\n",
    "### Note on Developer Experience\n",
    "\n",
    "BAML works much better in VS Code with their official extension, which provides syntax highlighting, autocomplete, inline testing, and an interactive playground. However, for this notebook tutorial, we'll work with BAML files directly without the enhanced IDE features.\n",
    "\n",
    "First, let's set up BAML support in our notebook.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ba1f7191",
   "metadata": {},
   "source": [
    "### BAML Setup\n",
    "\n",
    "Don't worry too much about this setup code - it will make sense later! For now, just know that:\n",
    "- BAML is a tool for working with language models\n",
    "- We need some special setup code to make it work nicely in Google Colab\n",
    "- The `get_baml_client()` function will be used to interact with AI models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9910f8a3",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install baml-py==0.202.0 pydantic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a4ad6e77",
   "metadata": {},
   "outputs": [],
   "source": [
    "import subprocess\n",
    "import os\n",
    "\n",
    "# Try to import Google Colab userdata, but don't fail if not in Colab\n",
    "try:\n",
    "    from google.colab import userdata\n",
    "    IN_COLAB = True\n",
    "except ImportError:\n",
    "    IN_COLAB = False\n",
    "\n",
    "def baml_generate():\n",
    "    try:\n",
    "        result = subprocess.run(\n",
    "            [\"baml-cli\", \"generate\"],\n",
    "            check=True,\n",
    "            capture_output=True,\n",
    "            text=True\n",
    "        )\n",
    "        if result.stdout:\n",
    "            print(\"[baml-cli generate]\\n\", result.stdout)\n",
    "        if result.stderr:\n",
    "            print(\"[baml-cli generate]\\n\", result.stderr)\n",
    "    except subprocess.CalledProcessError as e:\n",
    "        msg = (\n",
    "            f\"`baml-cli generate` failed with exit code {e.returncode}\\n\"\n",
    "            f\"--- STDOUT ---\\n{e.stdout}\\n\"\n",
    "            f\"--- STDERR ---\\n{e.stderr}\"\n",
    "        )\n",
    "        raise RuntimeError(msg) from None\n",
    "\n",
    "def get_baml_client():\n",
    "    \"\"\"\n",
    "    a bunch of fun jank to work around the google colab import cache\n",
    "    \"\"\"\n",
    "    # Set API key from Colab secrets or environment\n",
    "    if IN_COLAB:\n",
    "        os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
    "    elif 'OPENAI_API_KEY' not in os.environ:\n",
    "        print(\"Warning: OPENAI_API_KEY not set. Please set it in your environment.\")\n",
    "    \n",
    "    baml_generate()\n",
    "    \n",
    "    # Force delete all baml_client modules from sys.modules\n",
    "    import sys\n",
    "    modules_to_delete = [key for key in sys.modules.keys() if key.startswith('baml_client')]\n",
    "    for module in modules_to_delete:\n",
    "        del sys.modules[module]\n",
    "    \n",
    "    # Now import fresh\n",
    "    import baml_client\n",
    "    return baml_client.sync_client.b\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b99ba982",
   "metadata": {},
   "outputs": [],
   "source": [
    "!baml-cli init"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ee716f3a",
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls baml_src"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "894474da",
   "metadata": {},
   "source": [
    "Now let's create our agent that will use BAML to process user input.\n",
    "\n",
    "First, we'll define the core agent logic:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dbf9d929",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/01-agent.py\n",
    "import json\n",
    "from typing import Dict, Any, List\n",
    "\n",
    "# tool call or a respond to human tool\n",
    "AgentResponse = Any  # This will be the return type from b.DetermineNextStep\n",
    "\n",
    "class Event:\n",
    "    def __init__(self, type: str, data: Any):\n",
    "        self.type = type\n",
    "        self.data = data\n",
    "\n",
    "class Thread:\n",
    "    def __init__(self, events: List[Dict[str, Any]]):\n",
    "        self.events = events\n",
    "    \n",
    "    def serialize_for_llm(self):\n",
    "        # can change this to whatever custom serialization you want to do, XML, etc\n",
    "        # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105\n",
    "        return json.dumps(self.events)\n",
    "\n",
    "# right now this just runs one turn with the LLM, but\n",
    "# we'll update this function to handle all the agent logic\n",
    "def agent_loop(thread: Thread) -> AgentResponse:\n",
    "    b = get_baml_client()  # This will be defined by the BAML setup\n",
    "    next_step = b.DetermineNextStep(thread.serialize_for_llm())\n",
    "    return next_step"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b9421cd4",
   "metadata": {},
   "source": [
    "Next, we need to define the BAML function that our agent will use.\n",
    "\n",
    "### Understanding BAML Syntax\n",
    "\n",
    "BAML files define:\n",
    "- **Classes**: Structured output schemas (like `DoneForNow` below)\n",
    "- **Functions**: AI-powered functions that take inputs and return structured outputs\n",
    "- **Tests**: Example inputs/outputs to validate your prompts\n",
    "\n",
    "This BAML file defines what our agent can do:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "58d8bda5",
   "metadata": {},
   "outputs": [],
   "source": [
    "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/01-agent.baml && cat baml_src/agent.baml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1edc5279",
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls baml_src"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ee489cc1",
   "metadata": {},
   "source": [
    "Now let's create our main function that accepts a message parameter:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4fea69e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/01-main.py\n",
    "def main(message=\"hello from the notebook!\"):\n",
    "    # Create a new thread with the user's message as the initial event\n",
    "    thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n",
    "    \n",
    "    # Run the agent loop with the thread\n",
    "    result = agent_loop(thread)\n",
    "    print(result)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fe3fd9c7",
   "metadata": {},
   "source": [
    "Let's test our agent! Try calling main() with different messages:\n",
    "- `main(\"What's the weather like?\")`\n",
    "- `main(\"Tell me a joke\")`\n",
    "- `main(\"How are you doing today?\")`\n",
    "\n",
    "in this case, we'll use the baml_generate function to\n",
    "generate the pydantic and python bindings from our\n",
    "baml source, but in the future we'll skip this step as it\n",
    "is done automatically by the get_baml_client() function\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7fc1ee38",
   "metadata": {},
   "outputs": [],
   "source": [
    "baml_generate()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8756df71",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"Hello from the Python notebook!\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9b5ca88c",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "e79f4d84",
   "metadata": {},
   "source": [
    "## Chapter 2 - Add Calculator Tools"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4659d5ef",
   "metadata": {},
   "source": [
    "Let's add some calculator tools to our agent."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "73df701a",
   "metadata": {},
   "source": [
    "Let's start by adding a tool definition for the calculator.\n",
    "\n",
    "These are simple structured outputs that we'll ask the model to\n",
    "return as a \"next step\" in the agentic loop.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c538cd53",
   "metadata": {},
   "outputs": [],
   "source": [
    "!curl -fsSL -o baml_src/tool_calculator.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/02-tool_calculator.baml && cat baml_src/tool_calculator.baml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1df07ff3",
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls baml_src"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1ffe3854",
   "metadata": {},
   "source": [
    "Now, let's update the agent's DetermineNextStep method to\n",
    "expose the calculator tools as potential next steps.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d6f9ee99",
   "metadata": {},
   "outputs": [],
   "source": [
    "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/02-agent.baml && cat baml_src/agent.baml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "147bd22c",
   "metadata": {},
   "source": [
    "Now let's update our main function to show the tool call:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f8f99089",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/02-main.py\n",
    "def main(message=\"hello from the notebook!\"):\n",
    "    # Create a new thread with the user's message\n",
    "    thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n",
    "    \n",
    "    # Get BAML client\n",
    "    b = get_baml_client()\n",
    "    \n",
    "    # Get the next step from the agent - just show the tool call\n",
    "    next_step = b.DetermineNextStep(thread.serialize_for_llm())\n",
    "    \n",
    "    # Print the raw response to show the tool call\n",
    "    print(next_step)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ffb6c213",
   "metadata": {},
   "source": [
    "Let's try out the calculator! The agent should recognize that you want to perform a calculation\n",
    "and return the appropriate tool call instead of just a message.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7afaa326",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"can you add 3 and 4\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "599d21dd",
   "metadata": {},
   "source": [
    "## Chapter 3 - Process Tool Calls in a Loop"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d80e3f9f",
   "metadata": {},
   "source": [
    "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "427fbc77",
   "metadata": {},
   "source": [
    "In this chapter, we'll enhance our agent to process tool calls in a loop. This means:\n",
    "- The agent can call multiple tools in sequence\n",
    "- Each tool result is fed back to the agent\n",
    "- The agent continues until it has a final answer\n",
    "\n",
    "Let's update our agent to handle tool calls properly:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ac8ae567",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/03-agent.py\n",
    "import json\n",
    "from typing import Dict, Any, List\n",
    "\n",
    "class Thread:\n",
    "    def __init__(self, events: List[Dict[str, Any]]):\n",
    "        self.events = events\n",
    "    \n",
    "    def serialize_for_llm(self):\n",
    "        # can change this to whatever custom serialization you want to do, XML, etc\n",
    "        # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105\n",
    "        return json.dumps(self.events)\n",
    "\n",
    "\n",
    "def agent_loop(thread: Thread) -> str:\n",
    "    b = get_baml_client()\n",
    "    \n",
    "    while True:\n",
    "        next_step = b.DetermineNextStep(thread.serialize_for_llm())\n",
    "        print(\"nextStep\", next_step)\n",
    "        \n",
    "        if next_step.intent == \"done_for_now\":\n",
    "            # response to human, return the next step object\n",
    "            return next_step.message\n",
    "        elif next_step.intent == \"add\":\n",
    "            thread.events.append({\n",
    "                \"type\": \"tool_call\",\n",
    "                \"data\": next_step.__dict__\n",
    "            })\n",
    "            result = next_step.a + next_step.b\n",
    "            print(\"tool_response\", result)\n",
    "            thread.events.append({\n",
    "                \"type\": \"tool_response\",\n",
    "                \"data\": result\n",
    "            })\n",
    "            continue\n",
    "        else:\n",
    "            raise ValueError(f\"Unknown intent: {next_step.intent}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e875f4c2",
   "metadata": {},
   "source": [
    "Now let's update our main function to use the new agent loop:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2aead128",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/03-main.py\n",
    "def main(message=\"hello from the notebook!\"):\n",
    "    # Create a new thread with the user's message\n",
    "    thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n",
    "    \n",
    "    # Run the agent loop with full tool handling\n",
    "    result = agent_loop(thread)\n",
    "    \n",
    "    # Print the final response\n",
    "    print(f\"\\nFinal response: {result}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a29bf07d",
   "metadata": {},
   "source": [
    "Let's try it out! The agent should now call the tool and return the calculated result:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c6c6a0ca",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"can you add 3 and 4\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "4c20a7d5",
   "metadata": {},
   "source": [
    "You should see the agent:\n",
    "1. Recognize it needs to use the add tool\n",
    "2. Call the tool with the correct parameters\n",
    "3. Get the result (7)\n",
    "4. Generate a final response incorporating the result\n",
    "\n",
    "For more complex calculations, we need to handle all calculator operations. Let's add support for subtract, multiply, and divide:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "561c0b54",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/03b-agent.py\n",
    "import json\n",
    "from typing import Dict, Any, List, Union\n",
    "\n",
    "class Thread:\n",
    "    def __init__(self, events: List[Dict[str, Any]]):\n",
    "        self.events = events\n",
    "    \n",
    "    def serialize_for_llm(self):\n",
    "        # can change this to whatever custom serialization you want to do, XML, etc\n",
    "        # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105\n",
    "        return json.dumps(self.events)\n",
    "\n",
    "def handle_next_step(next_step, thread: Thread) -> Thread:\n",
    "    result: float\n",
    "    \n",
    "    if next_step.intent == \"add\":\n",
    "        result = next_step.a + next_step.b\n",
    "        print(\"tool_response\", result)\n",
    "        thread.events.append({\n",
    "            \"type\": \"tool_response\",\n",
    "            \"data\": result\n",
    "        })\n",
    "        return thread\n",
    "    elif next_step.intent == \"subtract\":\n",
    "        result = next_step.a - next_step.b\n",
    "        print(\"tool_response\", result)\n",
    "        thread.events.append({\n",
    "            \"type\": \"tool_response\",\n",
    "            \"data\": result\n",
    "        })\n",
    "        return thread\n",
    "    elif next_step.intent == \"multiply\":\n",
    "        result = next_step.a * next_step.b\n",
    "        print(\"tool_response\", result)\n",
    "        thread.events.append({\n",
    "            \"type\": \"tool_response\",\n",
    "            \"data\": result\n",
    "        })\n",
    "        return thread\n",
    "    elif next_step.intent == \"divide\":\n",
    "        result = next_step.a / next_step.b\n",
    "        print(\"tool_response\", result)\n",
    "        thread.events.append({\n",
    "            \"type\": \"tool_response\",\n",
    "            \"data\": result\n",
    "        })\n",
    "        return thread\n",
    "\n",
    "def agent_loop(thread: Thread) -> str:\n",
    "    b = get_baml_client()\n",
    "    \n",
    "    while True:\n",
    "        next_step = b.DetermineNextStep(thread.serialize_for_llm())\n",
    "        print(\"nextStep\", next_step)\n",
    "        \n",
    "        thread.events.append({\n",
    "            \"type\": \"tool_call\",\n",
    "            \"data\": next_step.__dict__\n",
    "        })\n",
    "        \n",
    "        if next_step.intent == \"done_for_now\":\n",
    "            # response to human, return the next step object\n",
    "            return next_step.message\n",
    "        elif next_step.intent in [\"add\", \"subtract\", \"multiply\", \"divide\"]:\n",
    "            thread = handle_next_step(next_step, thread)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7c612b06",
   "metadata": {},
   "source": [
    "Now let's test subtraction:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4be4af22",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"can you subtract 3 from 4\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1da0ad58",
   "metadata": {},
   "source": [
    "Test multiplication:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "49d5e040",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"can you multiply 3 and 4\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5a27929",
   "metadata": {},
   "source": [
    "Finally, let's test a complex multi-step calculation:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "431414aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "99ab35d5",
   "metadata": {},
   "source": [
    "Congratulations! You've taken your first step into hand-rolling an agent loop.\n",
    "\n",
    "Key concepts you've learned:\n",
    "- **Thread Management**: Tracking conversation history and tool calls\n",
    "- **Tool Execution**: Processing different tool types and returning results\n",
    "- **Agent Loop**: Continuing until the agent has a final answer\n",
    "\n",
    "From here, we'll start incorporating more intermediate and advanced concepts for 12-factor agents.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "9ba4e319",
   "metadata": {},
   "source": [
    "## Chapter 4 - Add Tests to agent.baml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6bf77db0",
   "metadata": {},
   "source": [
    "Let's add some tests to our BAML agent."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c6f0d38a",
   "metadata": {},
   "source": [
    "In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly.\n",
    "\n",
    "## Why Test BAML Functions?\n",
    "\n",
    "- **Catch regressions**: Ensure changes don't break existing behavior\n",
    "- **Document behavior**: Tests serve as living documentation\n",
    "- **Validate edge cases**: Test complex scenarios and conversation flows\n",
    "- **CI/CD integration**: Run tests automatically in your pipeline\n",
    "\n",
    "Let's start with a simple test that checks the agent's ability to handle basic interactions:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cd0ae03f",
   "metadata": {},
   "outputs": [],
   "source": [
    "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/04-agent.baml && cat baml_src/agent.baml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5bf05182",
   "metadata": {},
   "source": [
    "Run the tests to see them in action:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "30bbcac5",
   "metadata": {},
   "outputs": [],
   "source": [
    "!baml-cli test"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2cbbf5db",
   "metadata": {},
   "source": [
    "Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output.\n",
    "\n",
    "## BAML Assertion Syntax\n",
    "\n",
    "Assertions use the `@@assert` directive:\n",
    "```\n",
    "@@assert(name, {{condition}})\n",
    "```\n",
    "\n",
    "- `name`: A descriptive name for the assertion\n",
    "- `condition`: A boolean expression using `this` to access the output\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dbbc5283",
   "metadata": {},
   "outputs": [],
   "source": [
    "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/04b-agent.baml && cat baml_src/agent.baml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ecf9cb68",
   "metadata": {},
   "source": [
    "Run the tests again to see assertions in action:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d0611f3",
   "metadata": {},
   "outputs": [],
   "source": [
    "!baml-cli test"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8789e20e",
   "metadata": {},
   "source": [
    "Finally, let's add more complex test cases that test multi-step conversations.\n",
    "\n",
    "These tests simulate an entire conversation flow, including:\n",
    "- User input\n",
    "- Tool calls made by the agent\n",
    "- Tool responses\n",
    "- Final agent response\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "abf5be5b",
   "metadata": {},
   "outputs": [],
   "source": [
    "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/04c-agent.baml && cat baml_src/agent.baml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8ce0f9de",
   "metadata": {},
   "source": [
    "Run the comprehensive test suite:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4afe82b8",
   "metadata": {},
   "outputs": [],
   "source": [
    "!baml-cli test"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5d0ba42b",
   "metadata": {},
   "source": [
    "## Key Testing Concepts\n",
    "\n",
    "1. **Test Structure**: Each test specifies functions, arguments, and assertions\n",
    "2. **Progressive Testing**: Start simple, then test complex scenarios\n",
    "3. **Conversation History**: Test how the agent handles multi-turn conversations\n",
    "4. **Tool Integration**: Verify the agent correctly uses tools in sequence\n",
    "\n",
    "With these tests in place, you can confidently modify your agent knowing that core functionality is protected by automated tests!\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "bf15b77e",
   "metadata": {},
   "source": [
    "## Chapter 5 - Multiple Human Tools"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e69dbeca",
   "metadata": {},
   "source": [
    "In this section, we'll add support for multiple tools that serve to contact humans.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f3e29142",
   "metadata": {},
   "source": [
    "So far, our agent only returns a final answer with \"done_for_now\". But what if the agent needs clarification?\n",
    "\n",
    "Let's add a new tool that allows the agent to request more information from the user.\n",
    "\n",
    "## Why Human-in-the-Loop?\n",
    "\n",
    "- **Handle ambiguous inputs**: When user input is unclear or contains typos\n",
    "- **Request missing information**: When the agent needs more context\n",
    "- **Confirm sensitive operations**: Before performing important actions\n",
    "- **Interactive workflows**: Build conversational agents that engage users\n",
    "\n",
    "First, let's update our BAML file to include a ClarificationRequest tool:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9b42b75e",
   "metadata": {},
   "outputs": [],
   "source": [
    "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/05-agent.baml && cat baml_src/agent.baml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7be2af7d",
   "metadata": {},
   "source": [
    "Now let's update our agent to handle clarification requests:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "21a3f526",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/05-agent.py\n",
    "# Agent implementation with clarification support\n",
    "import json\n",
    "\n",
    "def agent_loop(thread, clarification_handler, max_iterations=3):\n",
    "    \"\"\"Run the agent loop until we get a final answer (max 3 iterations).\"\"\"\n",
    "    iteration_count = 0\n",
    "    while iteration_count < max_iterations:\n",
    "        iteration_count += 1\n",
    "        print(f\"🔄 Agent loop iteration {iteration_count}/{max_iterations}\")\n",
    "        \n",
    "        # Get the client\n",
    "        baml_client = get_baml_client()\n",
    "        \n",
    "        # Serialize the thread\n",
    "        thread_json = json.dumps(thread.events, indent=2)\n",
    "        \n",
    "        # Call the agent\n",
    "        result = baml_client.DetermineNextStep(thread_json)\n",
    "        \n",
    "        # Check what type of result we got based on intent\n",
    "        if hasattr(result, 'intent'):\n",
    "            if result.intent == 'done_for_now':\n",
    "                return result.message\n",
    "            elif result.intent == 'request_more_information':\n",
    "                # Get clarification from the human\n",
    "                clarification = clarification_handler(result.message)\n",
    "                \n",
    "                # Add the clarification to the thread\n",
    "                thread.events.append({\n",
    "                    \"type\": \"clarification_request\",\n",
    "                    \"data\": result.message\n",
    "                })\n",
    "                thread.events.append({\n",
    "                    \"type\": \"clarification_response\",\n",
    "                    \"data\": clarification\n",
    "                })\n",
    "                \n",
    "                # Continue the loop with the clarification\n",
    "            elif result.intent in ['add', 'subtract', 'multiply', 'divide']:\n",
    "                # Execute the appropriate tool based on intent\n",
    "                if result.intent == 'add':\n",
    "                    result_value = result.a + result.b\n",
    "                    operation = f\"add({result.a}, {result.b})\"\n",
    "                elif result.intent == 'subtract':\n",
    "                    result_value = result.a - result.b\n",
    "                    operation = f\"subtract({result.a}, {result.b})\"\n",
    "                elif result.intent == 'multiply':\n",
    "                    result_value = result.a * result.b\n",
    "                    operation = f\"multiply({result.a}, {result.b})\"\n",
    "                elif result.intent == 'divide':\n",
    "                    if result.b == 0:\n",
    "                        result_value = \"Error: Division by zero\"\n",
    "                    else:\n",
    "                        result_value = result.a / result.b\n",
    "                    operation = f\"divide({result.a}, {result.b})\"\n",
    "                \n",
    "                print(f\"🔧 Calling tool: {operation} = {result_value}\")\n",
    "                \n",
    "                # Add the tool call and result to the thread\n",
    "                thread.events.append({\n",
    "                    \"type\": \"tool_call\",\n",
    "                    \"data\": {\n",
    "                        \"tool\": \"calculator\",\n",
    "                        \"operation\": operation,\n",
    "                        \"result\": result_value\n",
    "                    }\n",
    "                })\n",
    "        else:\n",
    "            return \"Error: Unexpected result type\"\n",
    "    \n",
    "    # If we've reached max iterations without a final answer\n",
    "    return f\"Agent reached maximum iterations ({max_iterations}) without completing the task.\"\n",
    "\n",
    "class Thread:\n",
    "    \"\"\"Simple thread to track conversation history.\"\"\"\n",
    "    def __init__(self, events):\n",
    "        self.events = events"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5f017c77",
   "metadata": {},
   "source": [
    "Finally, let's create a main function that handles human interaction:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e648be92",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/05-main.py\n",
    "def get_human_input(prompt):\n",
    "    \"\"\"Get input from human, handling both Colab and local environments.\"\"\"\n",
    "    print(f\"\\n🤔 {prompt}\")\n",
    "    \n",
    "    if IN_COLAB:\n",
    "        # In Colab, use actual input\n",
    "        response = input(\"Your response: \")\n",
    "    else:\n",
    "        # In local testing, return a fixed response\n",
    "        response = \"I meant to multiply 3 and 4\"\n",
    "        print(f\"📝 [Auto-response for testing]: {response}\")\n",
    "    \n",
    "    return response\n",
    "\n",
    "def main(message=\"hello from the notebook!\"):\n",
    "    # Function to handle clarification requests\n",
    "    def handle_clarification(question):\n",
    "        return get_human_input(f\"The agent needs clarification: {question}\")\n",
    "    \n",
    "    # Create a new thread with the user's message\n",
    "    thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n",
    "    \n",
    "    print(f\"🚀 Starting agent with message: '{message}'\")\n",
    "    \n",
    "    # Run the agent loop\n",
    "    result = agent_loop(thread, handle_clarification)\n",
    "    \n",
    "    # Print the final response\n",
    "    print(f\"\\n✅ Final response: {result}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2f4b962e",
   "metadata": {},
   "source": [
    "Let's test with an ambiguous input that should trigger a clarification request:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "948684f2",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"can you multiply 3 and FD*(#F&&\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "54b7d0d4",
   "metadata": {},
   "source": [
    "You should see:\n",
    "1. The agent recognizes the input is unclear\n",
    "2. It asks for clarification\n",
    "3. In Colab, you'll be prompted to type a response\n",
    "4. In local testing, an auto-response is provided\n",
    "5. The agent continues with the clarified input\n",
    "\n",
    "## Interactive Testing in Colab\n",
    "\n",
    "When running in Google Colab, the `input()` function will create an interactive text box where you can type your response. Try different clarifications to see how the agent adapts!\n",
    "\n",
    "## Key Concepts\n",
    "\n",
    "- **Human Tools**: Special tool types that return control to the human\n",
    "- **Conversation Flow**: The agent can pause execution to get human input\n",
    "- **Context Preservation**: The full conversation history is maintained\n",
    "- **Flexible Handling**: Different behaviors for different environments\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "253d3f6f",
   "metadata": {},
   "source": [
    "## Chapter 6 - Customize Your Prompt with Reasoning"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "87dc996a",
   "metadata": {},
   "source": [
    "In this section, we'll explore how to customize the prompt of the agent with reasoning steps.\n",
    "\n",
    "This is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7694a842",
   "metadata": {},
   "source": [
    "## Why Add Reasoning to Prompts?\n",
    "\n",
    "Adding explicit reasoning steps to your prompts can significantly improve agent performance:\n",
    "\n",
    "- **Better decisions**: The model thinks through problems step-by-step\n",
    "- **Transparency**: You can see the model's thought process\n",
    "- **Fewer errors**: Structured thinking reduces mistakes\n",
    "- **Debugging**: Easier to identify where reasoning went wrong\n",
    "\n",
    "Let's update our agent prompt to include a reasoning step:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2b38033a",
   "metadata": {},
   "outputs": [],
   "source": [
    "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/06-agent.baml && cat baml_src/agent.baml"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "30aff7de",
   "metadata": {},
   "source": [
    "Now let's test it with a simple calculation to see the reasoning in action:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "515f9755",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"can you multiply 3 and 4\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "2f69536c",
   "metadata": {},
   "source": [
    "The model uses explicit reasoning steps to think through the problem before making a decision.\n",
    "\n",
    "## Advanced Prompt Engineering\n",
    "\n",
    "You can enhance your prompts further by:\n",
    "- Adding specific reasoning templates for different tasks\n",
    "- Including examples of good reasoning\n",
    "- Structuring the reasoning with numbered steps\n",
    "- Adding checks for common mistakes\n",
    "\n",
    "The key is to guide the model's thinking process while still allowing flexibility.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8274aff0",
   "metadata": {},
   "source": [
    "## Chapter 7 - Customize Your Context Window"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f930c899",
   "metadata": {},
   "source": [
    "In this section, we'll explore how to customize the context window of the agent.\n",
    "\n",
    "This is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1d4235ed",
   "metadata": {},
   "source": [
    "## Context Window Serialization\n",
    "\n",
    "How you format your conversation history can significantly impact:\n",
    "- **Token usage**: Some formats are more efficient\n",
    "- **Model understanding**: Clear structure helps the model\n",
    "- **Debugging**: Readable formats help development\n",
    "\n",
    "Let's implement two serialization formats: pretty-printed JSON and XML.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dccf9a9f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/07-agent.py\n",
    "# Agent with configurable serialization formats\n",
    "import json\n",
    "\n",
    "class Thread:\n",
    "    \"\"\"Thread that can serialize to different formats.\"\"\"\n",
    "    def __init__(self, events):\n",
    "        self.events = events\n",
    "    \n",
    "    def serialize_as_json(self):\n",
    "        \"\"\"Serialize thread events to pretty-printed JSON.\"\"\"\n",
    "        return json.dumps(self.events, indent=2)\n",
    "    \n",
    "    def serialize_as_xml(self):\n",
    "        \"\"\"Serialize thread events to XML format for better token efficiency.\"\"\"\n",
    "        import yaml\n",
    "        xml_parts = [\"<thread>\"]\n",
    "        \n",
    "        for event in self.events:\n",
    "            event_type = event['type']\n",
    "            event_data = event['data']\n",
    "            \n",
    "            if event_type == 'user_input':\n",
    "                xml_parts.append(f'  <user_input>{event_data}</user_input>')\n",
    "            elif event_type == 'tool_call':\n",
    "                # Use YAML for tool call args - more compact than nested XML\n",
    "                yaml_content = yaml.dump(event_data, default_flow_style=False).strip()\n",
    "                xml_parts.append(f'  <{event_data[\"tool\"]}>')\n",
    "                xml_parts.append('    ' + '\\n    '.join(yaml_content.split('\\n')))\n",
    "                xml_parts.append(f'  </{event_data[\"tool\"]}>')\n",
    "            elif event_type == 'clarification_request':\n",
    "                xml_parts.append(f'  <clarification_request>{event_data}</clarification_request>')\n",
    "            elif event_type == 'clarification_response':\n",
    "                xml_parts.append(f'  <clarification_response>{event_data}</clarification_response>')\n",
    "        \n",
    "        xml_parts.append(\"</thread>\")\n",
    "        return \"\\n\".join(xml_parts)\n",
    "\n",
    "def agent_loop(thread, clarification_handler, use_xml=True):\n",
    "    \"\"\"Run the agent loop with configurable serialization.\"\"\"\n",
    "    while True:\n",
    "        # Get the client\n",
    "        baml_client = get_baml_client()\n",
    "        \n",
    "        # Serialize the thread based on format preference\n",
    "        if use_xml:\n",
    "            thread_str = thread.serialize_as_xml()\n",
    "            print(f\"📄 Using XML serialization ({len(thread_str)} chars)\")\n",
    "        else:\n",
    "            thread_str = thread.serialize_as_json()\n",
    "            print(f\"📄 Using JSON serialization ({len(thread_str)} chars)\")\n",
    "        \n",
    "        # Call the agent\n",
    "        result = baml_client.DetermineNextStep(thread_str)\n",
    "        \n",
    "        # Check what type of result we got based on intent\n",
    "        if hasattr(result, 'intent'):\n",
    "            if result.intent == 'done_for_now':\n",
    "                return result.message\n",
    "            elif result.intent == 'request_more_information':\n",
    "                # Get clarification from the human\n",
    "                clarification = clarification_handler(result.message)\n",
    "                \n",
    "                # Add the clarification to the thread\n",
    "                thread.events.append({\n",
    "                    \"type\": \"clarification_request\",\n",
    "                    \"data\": result.message\n",
    "                })\n",
    "                thread.events.append({\n",
    "                    \"type\": \"clarification_response\",\n",
    "                    \"data\": clarification\n",
    "                })\n",
    "                \n",
    "                # Continue the loop with the clarification\n",
    "            elif result.intent in ['add', 'subtract', 'multiply', 'divide']:\n",
    "                # Execute the appropriate tool based on intent\n",
    "                if result.intent == 'add':\n",
    "                    result_value = result.a + result.b\n",
    "                    operation = f\"add({result.a}, {result.b})\"\n",
    "                elif result.intent == 'subtract':\n",
    "                    result_value = result.a - result.b\n",
    "                    operation = f\"subtract({result.a}, {result.b})\"\n",
    "                elif result.intent == 'multiply':\n",
    "                    result_value = result.a * result.b\n",
    "                    operation = f\"multiply({result.a}, {result.b})\"\n",
    "                elif result.intent == 'divide':\n",
    "                    if result.b == 0:\n",
    "                        result_value = \"Error: Division by zero\"\n",
    "                    else:\n",
    "                        result_value = result.a / result.b\n",
    "                    operation = f\"divide({result.a}, {result.b})\"\n",
    "                \n",
    "                print(f\"🔧 Calling tool: {operation} = {result_value}\")\n",
    "                \n",
    "                # Add the tool call and result to the thread\n",
    "                thread.events.append({\n",
    "                    \"type\": \"tool_call\",\n",
    "                    \"data\": {\n",
    "                        \"tool\": \"calculator\",\n",
    "                        \"operation\": operation,\n",
    "                        \"result\": result_value\n",
    "                    }\n",
    "                })\n",
    "        else:\n",
    "            return \"Error: Unexpected result type\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e02d1361",
   "metadata": {},
   "source": [
    "Now let's create a main function that can switch between formats:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "03c71da7",
   "metadata": {},
   "outputs": [],
   "source": [
    "# ./walkthrough/07-main.py\n",
    "def main(message=\"hello from the notebook!\", use_xml=True):\n",
    "    # Function to handle clarification requests\n",
    "    def handle_clarification(question):\n",
    "        return get_human_input(f\"The agent needs clarification: {question}\")\n",
    "    \n",
    "    # Create a new thread with the user's message\n",
    "    thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n",
    "    \n",
    "    print(f\"🚀 Starting agent with message: '{message}'\")\n",
    "    print(f\"📋 Using {'XML' if use_xml else 'JSON'} format for thread serialization\")\n",
    "    \n",
    "    # Run the agent loop with XML serialization\n",
    "    result = agent_loop(thread, handle_clarification, use_xml=use_xml)\n",
    "    \n",
    "    # Print the final response\n",
    "    print(f\"\\n✅ Final response: {result}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1d1718ab",
   "metadata": {},
   "source": [
    "Let's test with JSON format first:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "41b41a22",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"can you multiply 3 and 4, then divide the result by 2\", use_xml=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d1bb4844",
   "metadata": {},
   "source": [
    "Now let's try the same with XML format:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2ab2a144",
   "metadata": {},
   "outputs": [],
   "source": [
    "main(\"can you multiply 3 and 4, then divide the result by 2\", use_xml=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8883acac",
   "metadata": {},
   "source": [
    "## XML vs JSON Trade-offs\n",
    "\n",
    "**XML Benefits**:\n",
    "- More token-efficient for nested data\n",
    "- Clear hierarchy with opening/closing tags\n",
    "- Better for long conversations\n",
    "\n",
    "**JSON Benefits**:\n",
    "- Familiar to most developers\n",
    "- Easy to parse and debug\n",
    "- Native to JavaScript/Python\n",
    "\n",
    "Choose based on your specific needs and token constraints!\n"
   ]
  }
 ],
 "metadata": {},
 "nbformat": 4,
 "nbformat_minor": 5
}