Repository: humanlayer/12-factor-agents
Branch: main
Commit: d20c728368bf
Files: 429
Total size: 1.0 MB
Directory structure:
gitextract_8e8ds6gd/
├── .gitignore
├── CLAUDE.md
├── LICENSE
├── Makefile
├── README.md
├── content/
│ ├── appendix-13-pre-fetch.md
│ ├── brief-history-of-software.md
│ ├── factor-01-natural-language-to-tool-calls.md
│ ├── factor-02-own-your-prompts.md
│ ├── factor-03-own-your-context-window.md
│ ├── factor-04-tools-are-structured-outputs.md
│ ├── factor-05-unify-execution-state.md
│ ├── factor-06-launch-pause-resume.md
│ ├── factor-07-contact-humans-with-tools.md
│ ├── factor-08-own-your-control-flow.md
│ ├── factor-09-compact-errors.md
│ ├── factor-1-natural-language-to-tool-calls.md
│ ├── factor-10-small-focused-agents.md
│ ├── factor-11-trigger-from-anywhere.md
│ ├── factor-12-stateless-reducer.md
│ ├── factor-2-own-your-prompts.md
│ ├── factor-3-own-your-context-window.md
│ ├── factor-4-tools-are-structured-outputs.md
│ ├── factor-5-unify-execution-state.md
│ ├── factor-6-launch-pause-resume.md
│ ├── factor-7-contact-humans-with-tools.md
│ ├── factor-8-own-your-control-flow.md
│ └── factor-9-compact-errors.md
├── drafts/
│ ├── a2h-spec.md
│ └── ah2-openapi.json
├── hack/
│ └── contributors_markdown/
│ ├── .python-version
│ ├── README.md
│ ├── contributors_markdown.py
│ └── pyproject.toml
├── packages/
│ ├── create-12-factor-agent/
│ │ └── template/
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── baml_src/
│ │ │ ├── agent.baml
│ │ │ ├── clients.baml
│ │ │ ├── generators.baml
│ │ │ └── tool_calculator.baml
│ │ ├── package.json
│ │ ├── src/
│ │ │ ├── a2h.ts
│ │ │ ├── agent.ts
│ │ │ ├── cli.ts
│ │ │ ├── index.ts
│ │ │ ├── server.ts
│ │ │ └── state.ts
│ │ └── tsconfig.json
│ └── walkthroughgen/
│ ├── .gitignore
│ ├── examples/
│ │ ├── typescript/
│ │ │ ├── .gitignore
│ │ │ ├── walkthrough/
│ │ │ │ ├── 00-package-lock.json
│ │ │ │ ├── 00-package.json
│ │ │ │ ├── 00-tsconfig.json
│ │ │ │ ├── 01-index.ts
│ │ │ │ ├── 02-cli.ts
│ │ │ │ └── 02-index.ts
│ │ │ └── walkthrough.yaml
│ │ └── walkthroughgen/
│ │ └── walkthrough.yaml
│ ├── jest.config.js
│ ├── package.json
│ ├── prompt.md
│ ├── readme.md
│ ├── src/
│ │ ├── cli.ts
│ │ └── index.ts
│ ├── test/
│ │ ├── e2e/
│ │ │ └── test-e2e.ts
│ │ └── utils/
│ │ ├── console-mock.ts
│ │ └── temp-dir.ts
│ └── tsconfig.json
└── workshops/
├── .gitignore
├── .python-version
├── 2025-05/
│ ├── .gitignore
│ ├── Makefile
│ ├── final/
│ │ ├── .gitignore
│ │ ├── baml_src/
│ │ │ ├── agent.baml
│ │ │ ├── clients.baml
│ │ │ ├── generators.baml
│ │ │ └── tool_calculator.baml
│ │ ├── package.json
│ │ ├── src/
│ │ │ ├── agent.ts
│ │ │ ├── cli.ts
│ │ │ ├── index.ts
│ │ │ ├── server.ts
│ │ │ └── state.ts
│ │ └── tsconfig.json
│ ├── sections/
│ │ ├── 00-hello-world/
│ │ │ ├── README.md
│ │ │ └── walkthrough/
│ │ │ ├── 00-.gitignore
│ │ │ ├── 00-index.ts
│ │ │ ├── 00-package.json
│ │ │ └── 00-tsconfig.json
│ │ ├── 01-cli-and-agent/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ └── index.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 01-agent.baml
│ │ │ ├── 01-agent.ts
│ │ │ ├── 01-cli.ts
│ │ │ └── 01-index.ts
│ │ ├── 02-calculator-tools/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ └── generators.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ └── index.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 02-agent.baml
│ │ │ └── 02-tool_calculator.baml
│ │ ├── 03-tool-loop/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ └── index.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 03-agent.ts
│ │ │ └── 03b-agent.ts
│ │ ├── 04-baml-tests/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ └── index.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 04-agent.baml
│ │ │ ├── 04b-agent.baml
│ │ │ └── 04c-agent.baml
│ │ ├── 05-human-tools/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ └── index.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 05-agent.baml
│ │ │ ├── 05-agent.ts
│ │ │ ├── 05-cli.ts
│ │ │ ├── 05b-agent.baml
│ │ │ └── 05c-agent.baml
│ │ ├── 06-customize-prompt/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ └── index.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ └── 06-agent.baml
│ │ ├── 07-context-window/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ └── index.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 07-agent.ts
│ │ │ ├── 07b-agent.ts
│ │ │ └── 07c-agent.baml
│ │ ├── 08-api-endpoints/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ └── index.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ └── 08-server.ts
│ │ ├── 09-state-management/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ ├── index.ts
│ │ │ │ └── server.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 09-server.ts
│ │ │ └── 09-state.ts
│ │ ├── 10-human-approval/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── server.ts
│ │ │ │ └── state.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 10-agent.ts
│ │ │ └── 10-server.ts
│ │ ├── 11-humanlayer-approval/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── server.ts
│ │ │ │ └── state.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 11-cli.ts
│ │ │ ├── 11b-cli.ts
│ │ │ └── 11c-cli.ts
│ │ ├── 12-humanlayer-webhook/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ ├── generators.baml
│ │ │ │ └── tool_calculator.baml
│ │ │ ├── package.json
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ ├── index.ts
│ │ │ │ ├── server.ts
│ │ │ │ └── state.ts
│ │ │ ├── tsconfig.json
│ │ │ └── walkthrough/
│ │ │ ├── 12-1-server-init.ts
│ │ │ └── 12a-server.ts
│ │ └── final/
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── baml_src/
│ │ │ ├── agent.baml
│ │ │ ├── clients.baml
│ │ │ ├── generators.baml
│ │ │ └── tool_calculator.baml
│ │ ├── package.json
│ │ ├── src/
│ │ │ ├── agent.ts
│ │ │ ├── cli.ts
│ │ │ ├── index.ts
│ │ │ ├── server.ts
│ │ │ └── state.ts
│ │ └── tsconfig.json
│ ├── walkthrough/
│ │ ├── 00-.gitignore
│ │ ├── 00-index.ts
│ │ ├── 00-package.json
│ │ ├── 00-tsconfig.json
│ │ ├── 01-agent.baml
│ │ ├── 01-agent.ts
│ │ ├── 01-cli.ts
│ │ ├── 01-index.ts
│ │ ├── 02-agent.baml
│ │ ├── 02-tool_calculator.baml
│ │ ├── 03-agent.ts
│ │ ├── 03b-agent.ts
│ │ ├── 04-agent.baml
│ │ ├── 04b-agent.baml
│ │ ├── 04c-agent.baml
│ │ ├── 05-agent.baml
│ │ ├── 05-agent.ts
│ │ ├── 05-cli.ts
│ │ ├── 05b-agent.baml
│ │ ├── 05c-agent.baml
│ │ ├── 06-agent.baml
│ │ ├── 07-agent.ts
│ │ ├── 07b-agent.ts
│ │ ├── 07c-agent.baml
│ │ ├── 08-server.ts
│ │ ├── 09-server.ts
│ │ ├── 09-state.ts
│ │ ├── 10-agent.ts
│ │ ├── 10-server.ts
│ │ ├── 11-cli.ts
│ │ ├── 11b-cli.ts
│ │ ├── 11c-cli.ts
│ │ ├── 12-1-server-init.ts
│ │ ├── 12-server.ts
│ │ ├── 12a-server.ts
│ │ ├── 12aa-server.ts
│ │ └── 12b-server.ts
│ ├── walkthrough.md
│ └── walkthrough.yaml
├── 2025-05-17/
│ ├── .gitignore
│ ├── sections/
│ │ ├── 00-hello-world/
│ │ │ ├── README.md
│ │ │ └── walkthrough/
│ │ │ ├── 00-.gitignore
│ │ │ ├── 00-index.ts
│ │ │ ├── 00-package.json
│ │ │ └── 00-tsconfig.json
│ │ ├── 01-cli-and-agent/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── src/
│ │ │ │ └── index.ts
│ │ │ └── walkthrough/
│ │ │ ├── 01-agent.baml
│ │ │ ├── 01-agent.ts
│ │ │ ├── 01-cli.ts
│ │ │ └── 01-index.ts
│ │ ├── 02-calculator-tools/
│ │ │ ├── .gitignore
│ │ │ ├── README.md
│ │ │ ├── baml_src/
│ │ │ │ ├── agent.baml
│ │ │ │ ├── clients.baml
│ │ │ │ └── generators.baml
│ │ │ ├── src/
│ │ │ │ ├── agent.ts
│ │ │ │ ├── cli.ts
│ │ │ │ └── index.ts
│ │ │ └── walkthrough/
│ │ │ ├── 02-agent.baml
│ │ │ └── 02-tool_calculator.baml
│ │ └── 03-tool-loop/
│ │ ├── .gitignore
│ │ ├── README.md
│ │ ├── baml_src/
│ │ │ ├── agent.baml
│ │ │ ├── clients.baml
│ │ │ ├── generators.baml
│ │ │ └── tool_calculator.baml
│ │ ├── src/
│ │ │ ├── agent.ts
│ │ │ ├── cli.ts
│ │ │ └── index.ts
│ │ └── walkthrough/
│ │ ├── 03-agent.ts
│ │ └── 03b-agent.ts
│ ├── walkthrough/
│ │ ├── 00-.gitignore
│ │ ├── 00-index.ts
│ │ ├── 00-package.json
│ │ ├── 00-tsconfig.json
│ │ ├── 01-agent.baml
│ │ ├── 01-agent.ts
│ │ ├── 01-cli.ts
│ │ ├── 01-index.ts
│ │ ├── 02-agent.baml
│ │ ├── 02-tool_calculator.baml
│ │ ├── 03-agent.ts
│ │ ├── 03b-agent.ts
│ │ ├── 04-agent.baml
│ │ ├── 04b-agent.baml
│ │ ├── 04c-agent.baml
│ │ ├── 05-agent.baml
│ │ ├── 05-agent.ts
│ │ ├── 05-cli.ts
│ │ ├── 05b-agent.baml
│ │ ├── 05c-agent.baml
│ │ ├── 06-agent.baml
│ │ ├── 07-agent.ts
│ │ ├── 07b-agent.ts
│ │ ├── 07c-agent.baml
│ │ ├── 08-server.ts
│ │ ├── 09-server.ts
│ │ ├── 09-state.ts
│ │ ├── 10-agent.ts
│ │ ├── 10-server.ts
│ │ ├── 11-cli.ts
│ │ ├── 11b-cli.ts
│ │ ├── 11c-cli.ts
│ │ ├── 12-1-server-init.ts
│ │ ├── 12-server.ts
│ │ ├── 12a-server.ts
│ │ ├── 12aa-server.ts
│ │ └── 12b-server.ts
│ ├── walkthrough.md
│ └── walkthrough.yaml
└── 2025-07-16/
├── .gitignore
├── CLAUDE.md
├── hack/
│ ├── analyze_log_capture.py
│ ├── inspect_notebook.py
│ ├── minimal_test.ipynb
│ ├── test_log_capture.sh
│ └── testing.md
├── pyproject.toml
├── test_notebook_colab_sim.sh
├── walkthrough/
│ ├── 00-.gitignore
│ ├── 00-main.py
│ ├── 00-package.json
│ ├── 00-tsconfig.json
│ ├── 01-agent.baml
│ ├── 01-agent.py
│ ├── 01-main.py
│ ├── 02-agent.baml
│ ├── 02-main.py
│ ├── 02-tool_calculator.baml
│ ├── 03-agent.py
│ ├── 03-main.py
│ ├── 03b-agent.py
│ ├── 03b-agent.ts
│ ├── 04-agent.baml
│ ├── 04b-agent.baml
│ ├── 04c-agent.baml
│ ├── 05-agent.baml
│ ├── 05-agent.py
│ ├── 05-main.py
│ ├── 05b-agent.baml
│ ├── 05c-agent.baml
│ ├── 06-agent.baml
│ ├── 07-agent.py
│ ├── 07-main.py
│ ├── 07b-agent.ts
│ ├── 07c-agent.baml
│ ├── 08-server.ts
│ ├── 09-server.ts
│ ├── 09-state.ts
│ ├── 10-agent.ts
│ ├── 10-server.ts
│ ├── 11-cli.ts
│ ├── 11b-cli.ts
│ ├── 11c-cli.ts
│ ├── 12-1-server-init.ts
│ ├── 12-server.ts
│ ├── 12a-server.ts
│ ├── 12aa-server.ts
│ └── 12b-server.ts
├── walkthrough.yaml
├── walkthrough_python_enhanced.yaml
├── walkthroughgen_py.py
└── workshop_final.ipynb
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
.promptx
================================================
FILE: CLAUDE.md
================================================
# AI Assistant Instructions
**IMPORTANT: Copy or merge this file into your project's CLAUDE.md file to activate agent personas.**
## 🚨 MANDATORY PERSONA SELECTION
**CRITICAL: You MUST adopt one of the specialized personas before proceeding with any work.**
**BEFORE DOING ANYTHING ELSE**, you must read and adopt one of these personas:
1. **Developer Agent** - Read `.promptx/personas/agent-developer.md` - For coding, debugging, and implementation tasks
2. **Code Reviewer Agent** - Read `.promptx/personas/agent-code-reviewer.md` - For reviewing code changes and quality assurance
3. **Rebaser Agent** - Read `.promptx/personas/agent-rebaser.md` - For cleaning git history and rebasing changes
4. **Merger Agent** - Read `.promptx/personas/agent-merger.md` - For merging code across branches
5. **Multiplan Manager Agent** - Read `.promptx/personas/agent-multiplan-manager.md` - For orchestrating parallel work and creating plans
**DO NOT PROCEED WITHOUT SELECTING A PERSONA.** Each persona has specific rules, workflows, and tools that you MUST follow exactly.
## How to Choose Your Persona
- **Asked to write code, fix bugs, or implement features?** → Use Developer Agent
- **Asked to review code changes?** → Use Code Reviewer Agent
- **Asked to clean git history or rebase changes?** → Use Rebaser Agent
- **Asked to merge branches or consolidate work?** → Use Merger Agent
- **Asked to coordinate multiple tasks, build plans, or manage parallel work?** → Use Multiplan Manager Agent
## Project Context
[CUSTOMIZE THIS SECTION FOR YOUR PROJECT]
This project uses:
- **Language/Framework**: [Add your stack here]
- **Build Tool**: [Add your build commands]
- **Testing**: [Add your test commands]
- **Architecture**: [Describe your project structure]
## Core Principles (All Personas)
1. **READ FIRST**: Always read at least 1500 lines to understand context fully
2. **DELETE MORE THAN YOU ADD**: Complexity compounds into disasters
3. **FOLLOW EXISTING PATTERNS**: Don't invent new approaches
4. **BUILD AND TEST**: Run your build and test commands after changes
5. **COMMIT FREQUENTLY**: Every 5-10 minutes for meaningful progress
## File Structure Reference
[CUSTOMIZE THIS SECTION FOR YOUR PROJECT]
```
./
├── package.json # [or your dependency file]
├── src/ # [your source directory]
│ ├── [your modules]
│ └── [your files]
├── test/ # [your test directory]
├── .promptx/ # Agent personas (created by promptx init)
│ └── personas/
└── CLAUDE.md # This file (after merging)
```
## Common Commands (All Personas)
[CUSTOMIZE THIS SECTION FOR YOUR PROJECT]
```bash
# Build project
[your build command]
# Run tests
[your test command]
# Lint code
[your lint command]
# Deploy locally
[your deploy command]
```
## CRITICAL REMINDER
**You CANNOT proceed without adopting a persona.** Each persona has:
- Specific workflows and rules
- Required tools and commands
- Success criteria and verification steps
- Commit and progress requirements
**Choose your persona now and follow its instructions exactly.**
---
*Generated by promptx - Agent personas are in .promptx/personas/*
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of control, an entity
is assumed to be under the control of another entity if the entity
that controls the other entity owns directly or indirectly fifty
percent (50%) or more of the outstanding shares, or if there is some
other contractual arrangement whereby the first entity effectively
controls the management decisions of the other entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(which shall not include Communication that is clearly marked or
otherwise designated in writing by the copyright owner as "Not a Contribution").
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based upon (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and separate works based upon the Work.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control
systems, and issue tracking systems that are managed by, or on behalf
of, the Licensor for the purpose of discussing and improving the Work,
but excluding communication that is clearly marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to use, reproduce, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Work, and to
permit persons to whom the Work is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Work.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, trademark, patent,
attribution and other notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright notice to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Support. You can choose to offer,
and charge a fee for, warranty, support, indemnity or other
liability obligations and/or rights consistent with this License.
However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or support.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in comments for
the particular file format. (We recommend that you include a
file named COPYING in your distribution, which contains the
complete text of the license.)
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: Makefile
================================================
# Makefile for launch compatibility
.PHONY: setup teardown
setup:
@echo "Setting up project..."
@npm install || bun install || yarn install
@echo "Setup complete!"
teardown:
@echo "Tearing down project..."
@rm -rf node_modules
@echo "Teardown complete!"
================================================
FILE: README.md
================================================
# 12-Factor Agents - Principles for building reliable LLM applications
*In the spirit of [12 Factor Apps](https://12factor.net/)*. *The source for this project is public at https://github.com/humanlayer/12-factor-agents, and I welcome your feedback and contributions. Let's figure this out together!*
> [!TIP]
> Missed the AI Engineer World's Fair? [Catch the talk here](https://www.youtube.com/watch?v=8kMaTybvDUw)
>
> Looking for Context Engineering? [Jump straight to factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)
>
> Want to contribute to `npx/uvx create-12-factor-agent` - check out [the discussion thread](https://github.com/humanlayer/12-factor-agents/discussions/61)
Hi, I'm Dex. I've been [hacking](https://youtu.be/8bIHcttkOTE) on [AI agents](https://theouterloop.substack.com) for [a while](https://humanlayer.dev).
**I've tried every agent framework out there**, from the plug-and-play crew/langchains to the "minimalist" smolagents of the world to the "production grade" langraph, griptape, etc.
**I've talked to a lot of really strong founders**, in and out of YC, who are all building really impressive things with AI. Most of them are rolling the stack themselves. I don't see a lot of frameworks in production customer-facing agents.
**I've been surprised to find** that most of the products out there billing themselves as "AI Agents" are not all that agentic. A lot of them are mostly deterministic code, with LLM steps sprinkled in at just the right points to make the experience truly magical.
Agents, at least the good ones, don't follow the ["here's your prompt, here's a bag of tools, loop until you hit the goal"](https://www.anthropic.com/engineering/building-effective-agents#agents) pattern. Rather, they are comprised of mostly just software.
So, I set out to answer:
> ### **What are the principles we can use to build LLM-powered software that is actually good enough to put in the hands of production customers?**
Welcome to 12-factor agents. As every Chicago mayor since Daley has consistently plastered all over the city's major airports, we're glad you're here.
*Special thanks to [@iantbutler01](https://github.com/iantbutler01), [@tnm](https://github.com/tnm), [@hellovai](https://www.github.com/hellovai), [@stantonk](https://www.github.com/stantonk), [@balanceiskey](https://www.github.com/balanceiskey), [@AdjectiveAllison](https://www.github.com/AdjectiveAllison), [@pfbyjy](https://www.github.com/pfbyjy), [@a-churchill](https://www.github.com/a-churchill), and the SF MLOps community for early feedback on this guide.*
## The Short Version: The 12 Factors
Even if LLMs [continue to get exponentially more powerful](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md#what-if-llms-get-smarter), there will be core engineering techniques that make LLM-powered software more reliable, more scalable, and easier to maintain.
- [How We Got Here: A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md)
- [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)
- [Factor 2: Own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md)
- [Factor 3: Own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)
- [Factor 4: Tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)
- [Factor 5: Unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)
- [Factor 6: Launch/Pause/Resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md)
- [Factor 7: Contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)
- [Factor 8: Own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md)
- [Factor 9: Compact Errors into Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md)
- [Factor 10: Small, Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md)
- [Factor 11: Trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md)
- [Factor 12: Make your agent a stateless reducer](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md)
### Visual Nav
| | | |
|----|----|-----|
|[](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) | [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) | [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) |
|[](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) | [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) | [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) |
| [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) | [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) | [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) |
| [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) | [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) | [](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md) |
## How we got here
For a deeper dive on my agent journey and what led us here, check out [A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) - a quick summary here:
### The promise of agents
We're gonna talk a lot about Directed Graphs (DGs) and their Acyclic friends, DAGs. I'll start by pointing out that...well...software is a directed graph. There's a reason we used to represent programs as flow charts.

### From code to DAGs
Around 20 years ago, we started to see DAG orchestrators become popular. We're talking classics like [Airflow](https://airflow.apache.org/), [Prefect](https://www.prefect.io/), some predecessors, and some newer ones like ([dagster](https://dagster.io/), [inggest](https://www.inngest.com/), [windmill](https://www.windmill.dev/)). These followed the same graph pattern, with the added benefit of observability, modularity, retries, administration, etc.

### The promise of agents
I'm not the first [person to say this](https://youtu.be/Dc99-zTMyMg?si=bcT0hIwWij2mR-40&t=73), but my biggest takeaway when I started learning about agents, was that you get to throw the DAG away. Instead of software engineers coding each step and edge case, you can give the agent a goal and a set of transitions:

And let the LLM make decisions in real time to figure out the path

The promise here is that you write less software, you just give the LLM the "edges" of the graph and let it figure out the nodes. You can recover from errors, you can write less code, and you may find that LLMs find novel solutions to problems.
### Agents as loops
As we'll see later, it turns out this doesn't quite work.
Let's dive one step deeper - with agents you've got this loop consisting of 3 steps:
1. LLM determines the next step in the workflow, outputting structured json ("tool calling")
2. Deterministic code executes the tool call
3. The result is appended to the context window
4. Repeat until the next step is determined to be "done"
```python
initial_event = {"message": "..."}
context = [initial_event]
while True:
next_step = await llm.determine_next_step(context)
context.append(next_step)
if (next_step.intent === "done"):
return next_step.final_answer
result = await execute_step(next_step)
context.append(result)
```
Our initial context is just the starting event (maybe a user message, maybe a cron fired, maybe a webhook, etc), and we ask the llm to choose the next step (tool) or to determine that we're done.
Here's a multi-step example:
[](https://github.com/user-attachments/assets/3beb0966-fdb1-4c12-a47f-ed4e8240f8fd)
GIF Version

## Why 12-factor agents?
At the end of the day, this approach just doesn't work as well as we want it to.
In building HumanLayer, I've talked to at least 100 SaaS builders (mostly technical founders) looking to make their existing product more agentic. The journey usually goes something like:
1. Decide you want to build an agent
2. Product design, UX mapping, what problems to solve
3. Want to move fast, so grab $FRAMEWORK and *get to building*
4. Get to 70-80% quality bar
5. Realize that 80% isn't good enough for most customer-facing features
6. Realize that getting past 80% requires reverse-engineering the framework, prompts, flow, etc.
7. Start over from scratch
Random Disclaimers
**DISCLAIMER**: I'm not sure the exact right place to say this, but here seems as good as any: **this in BY NO MEANS meant to be a dig on either the many frameworks out there, or the pretty dang smart people who work on them**. They enable incredible things and have accelerated the AI ecosystem.
I hope that one outcome of this post is that agent framework builders can learn from the journeys of myself and others, and make frameworks even better.
Especially for builders who want to move fast but need deep control.
**DISCLAIMER 2**: I'm not going to talk about MCP. I'm sure you can see where it fits in.
**DISCLAIMER 3**: I'm using mostly typescript, for [reasons](https://www.linkedin.com/posts/dexterihorthy_llms-typescript-aiagents-activity-7290858296679313408-Lh9e?utm_source=share&utm_medium=member_desktop&rcm=ACoAAA4oHTkByAiD-wZjnGsMBUL_JT6nyyhOh30) but all this stuff works in python or any other language you prefer.
Anyways back to the thing...
### Design Patterns for great LLM applications
After digging through hundreds of AI libriaries and working with dozens of founders, my instinct is this:
1. There are some core things that make agents great
2. Going all in on a framework and building what is essentially a greenfield rewrite may be counter-productive
3. There are some core principles that make agents great, and you will get most/all of them if you pull in a framework
4. BUT, the fastest way I've seen for builders to get high-quality AI software in the hands of customers is to take small, modular concepts from agent building, and incorporate them into their existing product
5. These modular concepts from agents can be defined and applied by most skilled software engineers, even if they don't have an AI background
> #### The fastest way I've seen for builders to get good AI software in the hands of customers is to take small, modular concepts from agent building, and incorporate them into their existing product
## The 12 Factors (again)
- [How We Got Here: A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md)
- [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)
- [Factor 2: Own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md)
- [Factor 3: Own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)
- [Factor 4: Tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)
- [Factor 5: Unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)
- [Factor 6: Launch/Pause/Resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md)
- [Factor 7: Contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)
- [Factor 8: Own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md)
- [Factor 9: Compact Errors into Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md)
- [Factor 10: Small, Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md)
- [Factor 11: Trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md)
- [Factor 12: Make your agent a stateless reducer](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md)
## Honorable Mentions / other advice
- [Factor 13: Pre-fetch all the context you might need](https://github.com/humanlayer/12-factor-agents/blob/main/content/appendix-13-pre-fetch.md)
## Related Resources
- Contribute to this guide [here](https://github.com/humanlayer/12-factor-agents)
- [I talked about a lot of this on an episode of the Tool Use podcast](https://youtu.be/8bIHcttkOTE) in March 2025
- I write about some of this stuff at [The Outer Loop](https://theouterloop.substack.com)
- I do [webinars about Maximizing LLM Performance](https://github.com/hellovai/ai-that-works/tree/main) with [@hellovai](https://github.com/hellovai)
- We build OSS agents with this methodology under [got-agents/agents](https://github.com/got-agents/agents)
- We ignored all our own advice and built a [framework for running distributed agents in kubernetes](https://github.com/humanlayer/kubechain)
- Other links from this guide:
- [12 Factor Apps](https://12factor.net)
- [Building Effective Agents (Anthropic)](https://www.anthropic.com/engineering/building-effective-agents#agents)
- [Prompts are Functions](https://thedataexchange.media/baml-revolution-in-ai-engineering/ )
- [Library patterns: Why frameworks are evil](https://tomasp.net/blog/2015/library-frameworks/)
- [The Wrong Abstraction](https://sandimetz.com/blog/2016/1/20/the-wrong-abstraction)
- [Mailcrew Agent](https://github.com/dexhorthy/mailcrew)
- [Mailcrew Demo Video](https://www.youtube.com/watch?v=f_cKnoPC_Oo)
- [Chainlit Demo](https://x.com/chainlit_io/status/1858613325921480922)
- [TypeScript for LLMs](https://www.linkedin.com/posts/dexterihorthy_llms-typescript-aiagents-activity-7290858296679313408-Lh9e)
- [Schema Aligned Parsing](https://www.boundaryml.com/blog/schema-aligned-parsing)
- [Function Calling vs Structured Outputs vs JSON Mode](https://www.vellum.ai/blog/when-should-i-use-function-calling-structured-outputs-or-json-mode)
- [BAML on GitHub](https://github.com/boundaryml/baml)
- [OpenAI JSON vs Function Calling](https://docs.llamaindex.ai/en/stable/examples/llm/openai_json_vs_function_calling/)
- [Outer Loop Agents](https://theouterloop.substack.com/p/openais-realtime-api-is-a-step-towards)
- [Airflow](https://airflow.apache.org/)
- [Prefect](https://www.prefect.io/)
- [Dagster](https://dagster.io/)
- [Inngest](https://www.inngest.com/)
- [Windmill](https://www.windmill.dev/)
- [The AI Agent Index (MIT)](https://aiagentindex.mit.edu/)
- [NotebookLM on Finding Model Capability Boundaries](https://open.substack.com/pub/swyx/p/notebooklm?selection=08e1187c-cfee-4c63-93c9-71216640a5f8)
## Contributors
Thanks to everyone who has contributed to 12-factor agents!
[
](https://github.com/dexhorthy) [
](https://github.com/Sypherd) [
](https://github.com/tofaramususa) [
](https://github.com/a-churchill) [
](https://github.com/Elijas) [
](https://github.com/hugolmn) [
](https://github.com/jeremypeters)
[
](https://github.com/kndl) [
](https://github.com/maciejkos) [
](https://github.com/pfbyjy) [
](https://github.com/0xRaduan) [
](https://github.com/zyuanlim) [
](https://github.com/lombardo-chcg) [
](https://github.com/sahanatvessel)
## License
All content and images are licensed under a CC BY-SA 4.0 License
Code is licensed under the Apache 2.0 License
================================================
FILE: content/appendix-13-pre-fetch.md
================================================
### Factor 13 - pre-fetch all the context you might need
If there's a high chance that your model will call tool X, don't waste token round trips telling the model to fetch it, that is, instead of a pseudo-prompt like:
```jinja
When looking at deployments, you will likely want to fetch the list of published git tags,
so you can use it to deploy to prod.
Here's what happened so far:
{{ thread.events }}
What's the next step?
Answer in JSON format with one of the following intents:
{
intent: 'deploy_backend_to_prod',
tag: string
} OR {
intent: 'list_git_tags'
} OR {
intent: 'done_for_now',
message: string
}
```
and your code looks like
```python
thread = {"events": [initial_message]}
next_step = await determine_next_step(thread)
while True:
switch next_step.intent:
case 'list_git_tags':
tags = await fetch_git_tags()
thread["events"].append({
type: 'list_git_tags',
data: tags,
})
case 'deploy_backend_to_prod':
deploy_result = await deploy_backend_to_prod(next_step.data.tag)
thread["events"].append({
"type": 'deploy_backend_to_prod',
"data": deploy_result,
})
case 'done_for_now':
await notify_human(next_step.message)
break
# ...
```
You might as well just fetch the tags and include them in the context window, like:
```diff
- When looking at deployments, you will likely want to fetch the list of published git tags,
- so you can use it to deploy to prod.
+ The current git tags are:
+ {{ git_tags }}
Here's what happened so far:
{{ thread.events }}
What's the next step?
Answer in JSON format with one of the following intents:
{
intent: 'deploy_backend_to_prod',
tag: string
- } OR {
- intent: 'list_git_tags'
} OR {
intent: 'done_for_now',
message: string
}
```
and your code looks like
```diff
thread = {"events": [initial_message]}
+ git_tags = await fetch_git_tags()
- next_step = await determine_next_step(thread)
+ next_step = await determine_next_step(thread, git_tags)
while True:
switch next_step.intent:
- case 'list_git_tags':
- tags = await fetch_git_tags()
- thread["events"].append({
- type: 'list_git_tags',
- data: tags,
- })
case 'deploy_backend_to_prod':
deploy_result = await deploy_backend_to_prod(next_step.data.tag)
thread["events"].append({
"type": 'deploy_backend_to_prod',
"data": deploy_result,
})
case 'done_for_now':
await notify_human(next_step.message)
break
# ...
```
or even just include the tags in the thread and remove the specific parameter from your prompt template:
```diff
thread = {"events": [initial_message]}
+ # add the request
+ thread["events"].append({
+ "type": 'list_git_tags',
+ })
git_tags = await fetch_git_tags()
+ # add the result
+ thread["events"].append({
+ "type": 'list_git_tags_result',
+ "data": git_tags,
+ })
- next_step = await determine_next_step(thread, git_tags)
+ next_step = await determine_next_step(thread)
while True:
switch next_step.intent:
case 'deploy_backend_to_prod':
deploy_result = await deploy_backend_to_prod(next_step.data.tag)
thread["events"].append(deploy_result)
case 'done_for_now':
await notify_human(next_step.message)
break
# ...
```
Overall:
> #### If you already know what tools you'll want the model to call, just call them DETERMINISTICALLY and let the model do the hard part of figuring out how to use their outputs
Again, AI engineering is all about [Context Engineering](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md).
[← Stateless Reducer](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md) | [Further Reading →](https://github.com/humanlayer/12-factor-agents/blob/main/README.md#related-resources)
================================================
FILE: content/brief-history-of-software.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
## The longer version: how we got here
### You don't have to listen to me
Whether you're new to agents or an ornery old veteran like me, I'm going to try to convince you to throw out most of what you think about AI Agents, take a step back, and rethink them from first principles. (spoiler alert if you didn't catch the OpenAI responses launch a few weeks back, but pushing MORE agent logic behind an API ain't it)
## Agents are software, and a brief history thereof
let's talk about how we got here
### 60 years ago
We're gonna talk a lot about Directed Graphs (DGs) and their Acyclic friends, DAGs. I'll start by pointing out that...well...software is a directed graph. There's a reason we used to represent programs as flow charts.

### 20 years ago
Around 20 years ago, we started to see DAG orchestrators become popular. We're talking classics like [Airflow](https://airflow.apache.org/), [Prefect](https://www.prefect.io/), some predecessors, and some newer ones like ([dagster](https://dagster.io/), [inggest](https://www.inngest.com/), [windmill](https://www.windmill.dev/)). These followed the same graph pattern, with the added benefit of observability, modularity, retries, administration, etc.

### 10-15 years ago
When ML models started to get good enough to be useful, we started to see DAGs with ML models sprinkled in. You might imagine steps like "summarize the text in this column into a new column" or "classify the support issues by severity or sentiment".

But at the end of the day, it's still mostly the same good old deterministic software.
### The promise of agents
I'm not the first [person to say this](https://youtu.be/Dc99-zTMyMg?si=bcT0hIwWij2mR-40&t=73), but my biggest takeaway when I started learning about agents, was that you get to throw the DAG away. Instead of software engineers coding each step and edge case, you can give the agent a goal and a set of transitions:

And let the LLM make decisions in real time to figure out the path

The promise here is that you write less software, you just give the LLM the "edges" of the graph and let it figure out the nodes. You can recover from errors, you can write less code, and you may find that LLMs find novel solutions to problems.
### Agents as loops
Put another way, you've got this loop consisting of 3 steps:
1. LLM determines the next step in the workflow, outputting structured json ("tool calling")
2. Deterministic code executes the tool call
3. The result is appended to the context window
4. repeat until the next step is determined to be "done"
```python
initial_event = {"message": "..."}
context = [initial_event]
while True:
next_step = await llm.determine_next_step(context)
context.append(next_step)
if (next_step.intent === "done"):
return next_step.final_answer
result = await execute_step(next_step)
context.append(result)
```
Our initial context is just the starting event (maybe a user message, maybe a cron fired, maybe a webhook, etc),
and we ask the llm to choose the next step (tool) or to determine that we're done.
Here's a multi-step example:
[](https://github.com/user-attachments/assets/3beb0966-fdb1-4c12-a47f-ed4e8240f8fd)
GIF Version

And the "materialized" DAG that was generated would look something like:

### The problem with this "loop until you solve it" pattern
The biggest problems with this pattern:
- Agents get lost when the context window gets too long - they spin out trying the same broken approach over and over again
- literally thats it, but that's enough to kneecap the approach
Even if you haven't hand-rolled an agent, you've probably seen this long-context problem in working with agentic coding tools. They just get lost after a while and you need to start a new chat.
I'll even perhaps posit something I've heard in passing quite a bit, and that YOU probably have developed your own intuition around:
> ### **Even as models support longer and longer context windows, you'll ALWAYS get better results with a small, focused prompt and context**
Most builders I've talked to **pushed the "tool calling loop" idea to the side** when they realized that anything more than 10-20 turns becomes a big mess that the LLM can't recover from. Even if the agent gets it right 90% of the time, that's miles away from "good enough to put in customer hands". Can you imagine a web app that crashed on 10% of page loads?
**Update 2025-06-09** - I really like how [@swyx](https://x.com/swyx/status/1932125643384455237) put this:
### What actually works - micro agents
One thing that I **have** seen in the wild quite a bit is taking the agent pattern and sprinkling it into a broader more deterministic DAG.

You might be asking - "why use agents at all in this case?" - we'll get into that shortly, but basically, having language models managing well-scoped sets of tasks makes it easy to incorporate live human feedback, translating it into workflow steps without spinning out into context error loops. ([factor 1](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md), [factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) [factor 7](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)).
> #### having language models managing well-scoped sets of tasks makes it easy to incorporate live human feedback...without spinning out into context error loops
### A real life micro agent
Here's an example of how deterministic code might run one micro agent responsible for handling the human-in-the-loop steps for deployment.

* **Human** Merges PR to GitHub main branch
* **Deterministic Code** Deploys to staging env
* **Deterministic Code** Runs end-to-end (e2e) tests against staging
* **Deterministic Code** Hands to agent for prod deployment, with initial context: "deploy SHA 4af9ec0 to production"
* **Agent** calls `deploy_frontend_to_prod(4af9ec0)`
* **Deterministic code** requests human approval on this action
* **Human** Rejects the action with feedback "can you deploy the backend first?"
* **Agent** calls `deploy_backend_to_prod(4af9ec0)`
* **Deterministic code** requests human approval on this action
* **Human** approves the action
* **Deterministic code** executed the backend deployment
* **Agent** calls `deploy_frontend_to_prod(4af9ec0)`
* **Deterministic code** requests human approval on this action
* **Human** approves the action
* **Deterministic code** executed the frontend deployment
* **Agent** determines that the task was completed successfully, we're done!
* **Deterministic code** run the end-to-end tests against production
* **Deterministic code** task completed, OR pass to rollback agent to review failures and potentially roll back
[](https://github.com/user-attachments/assets/deb356e9-0198-45c2-9767-231cb569ae13)
GIF Version

This example is based on a real life [OSS agent we've shipped to manage our deployments at Humanlayer](https://github.com/got-agents/agents/tree/main/deploybot-ts) - here is a real conversation I had with it last week:

We haven't given this agent a huge pile of tools or tasks. The primary value in the LLM is parsing the human's plaintext feedback and proposing an updated course of action. We isolate tasks and contexts as much as possible to keep the LLM focused on a small, 5-10 step workflow.
Here's another [more classic support / chatbot demo](https://x.com/chainlit_io/status/1858613325921480922).
### So what's an agent really?
- **prompt** - tell an LLM how to behave, and what "tools" it has available. The output of the prompt is a JSON object that describe the next step in the workflow (the "tool call" or "function call"). ([factor 2](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md))
- **switch statement** - based on the JSON that the LLM returns, decide what to do with it. (part of [factor 8](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md))
- **accumulated context** - store the list of steps that have happened and their results ([factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md))
- **for loop** - until the LLM emits some sort of "Terminal" tool call (or plaintext response), add the result of the switch statement to the context window and ask the LLM to choose the next step. ([factor 8](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md))

In the "deploybot" example, we gain a couple benefits from owning the control flow and context accumulation:
- In our **switch statement** and **for loop**, we can hijack control flow to pause for human input or to wait for completion of long-running tasks
- We can trivially serialize the **context** window for pause+resume
- In our **prompt**, we can optimize the heck out of how we pass instructions and "what happened so far" to the LLM
[Part II](https://github.com/humanlayer/12-factor-agents/blob/main/README.md#12-factor-agents) will **formalize these patterns** so they can be applied to add impressive AI features to any software project, without needing to go all in on conventional implementations/definitions of "AI agent".
[Factor 1 - Natural Language to Tool Calls →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md)
================================================
FILE: content/factor-01-natural-language-to-tool-calls.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 1. Natural Language to Tool Calls
One of the most common patterns in agent building is to convert natural language to structured tool calls. This is a powerful pattern that allows you to build agents that can reason about tasks and execute them.

This pattern, when applied atomically, is the simple translation of a phrase like
> can you create a payment link for $750 to Terri for sponsoring the february AI tinkerers meetup?
to a structured object that describes a Stripe API call like
```json
{
"function": {
"name": "create_payment_link",
"parameters": {
"amount": 750,
"customer": "cust_128934ddasf9",
"product": "prod_8675309",
"price": "prc_09874329fds",
"quantity": 1,
"memo": "Hey Jeff - see below for the payment link for the february ai tinkerers meetup"
}
}
}
```
**Note**: in reality the stripe API is a bit more complex, a [real agent that does this](https://github.com/dexhorthy/mailcrew) ([video](https://www.youtube.com/watch?v=f_cKnoPC_Oo)) would list customers, list products, list prices, etc to build this payload with the proper ids, or include those ids in the prompt/context window (we'll see below how those are kinda the same thing though!)
From there, deterministic code can pick up the payload and do something with it. (More on this in [factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md))
```python
# The LLM takes natural language and returns a structured object
nextStep = await llm.determineNextStep(
"""
create a payment link for $750 to Jeff
for sponsoring the february AI tinkerers meetup
"""
)
# Handle the structured output based on its function
if nextStep.function == 'create_payment_link':
stripe.paymentlinks.create(nextStep.parameters)
return # or whatever you want, see below
elif nextStep.function == 'something_else':
# ... more cases
pass
else: # the model didn't call a tool we know about
# do something else
pass
```
**NOTE**: While a full agent would then receive the API call result and loop with it, eventually returning something like
> I've successfully created a payment link for $750 to Terri for sponsoring the february AI tinkerers meetup. Here's the link: https://buy.stripe.com/test_1234567890
**Instead**, We're actually going to skip that step here, and save it for another factor, which you may or may not want to also incorporate (up to you!)
[← How We Got Here](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) | [Own Your Prompts →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md)
================================================
FILE: content/factor-02-own-your-prompts.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 2. Own your prompts
Don't outsource your prompt engineering to a framework.

By the way, [this is far from novel advice:](https://hamel.dev/blog/posts/prompt/)

Some frameworks provide a "black box" approach like this:
```python
agent = Agent(
role="...",
goal="...",
personality="...",
tools=[tool1, tool2, tool3]
)
task = Task(
instructions="...",
expected_output=OutputModel
)
result = agent.run(task)
```
This is great for pulling in some TOP NOTCH prompt engineering to get you started, but it is often difficult to tune and/or reverse engineer to get exactly the right tokens into your model.
Instead, own your prompts and treat them as first-class code:
```rust
function DetermineNextStep(thread: string) -> DoneForNow | ListGitTags | DeployBackend | DeployFrontend | RequestMoreInformation {
prompt #"
{{ _.role("system") }}
You are a helpful assistant that manages deployments for frontend and backend systems.
You work diligently to ensure safe and successful deployments by following best practices
and proper deployment procedures.
Before deploying any system, you should check:
- The deployment environment (staging vs production)
- The correct tag/version to deploy
- The current system status
You can use tools like deploy_backend, deploy_frontend, and check_deployment_status
to manage deployments. For sensitive deployments, use request_approval to get
human verification.
Always think about what to do first, like:
- Check current deployment status
- Verify the deployment tag exists
- Request approval if needed
- Deploy to staging before production
- Monitor deployment progress
{{ _.role("user") }}
{{ thread }}
What should the next step be?
"#
}
```
(the above example uses [BAML](https://github.com/boundaryml/baml) to generate the prompt, but you can do this with any prompt engineering tool you want, or even just template it manually)
If the signature looks a little funny, we'll get to that in [factor 4 - tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)
```typescript
function DetermineNextStep(thread: string) -> DoneForNow | ListGitTags | DeployBackend | DeployFrontend | RequestMoreInformation {
```
Key benefits of owning your prompts:
1. **Full Control**: Write exactly the instructions your agent needs, no black box abstractions
2. **Testing and Evals**: Build tests and evals for your prompts just like you would for any other code
3. **Iteration**: Quickly modify prompts based on real-world performance
4. **Transparency**: Know exactly what instructions your agent is working with
5. **Role Hacking**: take advantage of APIs that support nonstandard usage of user/assistant roles - for example, the now-deprecated non-chat flavor of OpenAI "completions" API. This includes some so-called "model gaslighting" techniques
Remember: Your prompts are the primary interface between your application logic and the LLM.
Having full control over your prompts gives you the flexibility and prompt control you need for production-grade agents.
I don't know what's the best prompt, but I know you want the flexibility to be able to try EVERYTHING.
[← Natural Language To Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) | [Own Your Context Window →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)
================================================
FILE: content/factor-03-own-your-context-window.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 3. Own your context window
You don't necessarily need to use standard message-based formats for conveying context to an LLM.
> #### At any given point, your input to an LLM in an agent is "here's what's happened so far, what's the next step"
Everything is context engineering. [LLMs are stateless functions](https://thedataexchange.media/baml-revolution-in-ai-engineering/) that turn inputs into outputs. To get the best outputs, you need to give them the best inputs.
Creating great context means:
- The prompt and instructions you give to the model
- Any documents or external data you retrieve (e.g. RAG)
- Any past state, tool calls, results, or other history
- Any past messages or events from related but separate histories/conversations (Memory)
- Instructions about what sorts of structured data to output

### on context engineering
This guide is all about getting as much as possible out of today's models. Notably not mentioned are:
- Changes to models parameters like temperature, top_p, frequency_penalty, presence_penalty, etc.
- Training your own completion or embedding models
- Fine-tuning existing models
Again, I don't know what's the best way to hand context to an LLM, but I know you want the flexibility to be able to try EVERYTHING.
#### Standard vs Custom Context Formats
Most LLM clients use a standard message-based format like this:
```yaml
[
{
"role": "system",
"content": "You are a helpful assistant..."
},
{
"role": "user",
"content": "Can you deploy the backend?"
},
{
"role": "assistant",
"content": null,
"tool_calls": [
{
"id": "1",
"name": "list_git_tags",
"arguments": "{}"
}
]
},
{
"role": "tool",
"name": "list_git_tags",
"content": "{\"tags\": [{\"name\": \"v1.2.3\", \"commit\": \"abc123\", \"date\": \"2024-03-15T10:00:00Z\"}, {\"name\": \"v1.2.2\", \"commit\": \"def456\", \"date\": \"2024-03-14T15:30:00Z\"}, {\"name\": \"v1.2.1\", \"commit\": \"abe033d\", \"date\": \"2024-03-13T09:15:00Z\"}]}",
"tool_call_id": "1"
}
]
```
While this works great for most use cases, if you want to really get THE MOST out of today's LLMs, you need to get your context into the LLM in the most token- and attention-efficient way you can.
As an alternative to the standard message-based format, you can build your own context format that's optimized for your use case. For example, you can use custom objects and pack/spread them into one or more user, system, assistant, or tool messages as makes sense.
Here's an example of putting the whole context window into a single user message:
```yaml
[
{
"role": "system",
"content": "You are a helpful assistant..."
},
{
"role": "user",
"content": |
Here's everything that happened so far:
From: @alex
Channel: #deployments
Text: Can you deploy the backend?
intent: "list_git_tags"
tags:
- name: "v1.2.3"
commit: "abc123"
date: "2024-03-15T10:00:00Z"
- name: "v1.2.2"
commit: "def456"
date: "2024-03-14T15:30:00Z"
- name: "v1.2.1"
commit: "ghi789"
date: "2024-03-13T09:15:00Z"
what's the next step?
}
]
```
The model may infer that you're asking it `what's the next step` by the tool schemas you supply, but it never hurts to roll it into your prompt template.
### code example
We can build this with something like:
```python
class Thread:
events: List[Event]
class Event:
# could just use string, or could be explicit - up to you
type: Literal["list_git_tags", "deploy_backend", "deploy_frontend", "request_more_information", "done_for_now", "list_git_tags_result", "deploy_backend_result", "deploy_frontend_result", "request_more_information_result", "done_for_now_result", "error"]
data: ListGitTags | DeployBackend | DeployFrontend | RequestMoreInformation |
ListGitTagsResult | DeployBackendResult | DeployFrontendResult | RequestMoreInformationResult | string
def event_to_prompt(event: Event) -> str:
data = event.data if isinstance(event.data, str) \
else stringifyToYaml(event.data)
return f"<{event.type}>\n{data}\n{event.type}>"
def thread_to_prompt(thread: Thread) -> str:
return '\n\n'.join(event_to_prompt(event) for event in thread.events)
```
#### Example Context Windows
Here's how context windows might look with this approach:
**Initial Slack Request:**
```xml
From: @alex
Channel: #deployments
Text: Can you deploy the latest backend to production?
```
**After Listing Git Tags:**
```xml
From: @alex
Channel: #deployments
Text: Can you deploy the latest backend to production?
Thread: []
intent: "list_git_tags"
tags:
- name: "v1.2.3"
commit: "abc123"
date: "2024-03-15T10:00:00Z"
- name: "v1.2.2"
commit: "def456"
date: "2024-03-14T15:30:00Z"
- name: "v1.2.1"
commit: "ghi789"
date: "2024-03-13T09:15:00Z"
```
**After Error and Recovery:**
```xml
From: @alex
Channel: #deployments
Text: Can you deploy the latest backend to production?
Thread: []
intent: "deploy_backend"
tag: "v1.2.3"
environment: "production"
error running deploy_backend: Failed to connect to deployment service
intent: "request_more_information_from_human"
question: "I had trouble connecting to the deployment service, can you provide more details and/or check on the status of the service?"
data:
response: "I'm not sure what's going on, can you check on the status of the latest workflow?"
```
From here your next step might be:
```python
nextStep = await determine_next_step(thread_to_prompt(thread))
```
```python
{
"intent": "get_workflow_status",
"workflow_name": "tag_push_prod.yaml",
}
```
The XML-style format is just one example - the point is you can build your own format that makes sense for your application. You'll get better quality if you have the flexibility to experiment with different context structures and what you store vs. what you pass to the LLM.
Key benefits of owning your context window:
1. **Information Density**: Structure information in ways that maximize the LLM's understanding
2. **Error Handling**: Include error information in a format that helps the LLM recover. Consider hiding errors and failed calls from context window once they are resolved.
3. **Safety**: Control what information gets passed to the LLM, filtering out sensitive data
4. **Flexibility**: Adapt the format as you learn what works best for your use case
5. **Token Efficiency**: Optimize context format for token efficiency and LLM understanding
Context includes: prompts, instructions, RAG documents, history, tool calls, memory
Remember: The context window is your primary interface with the LLM. Taking control of how you structure and present information can dramatically improve your agent's performance.
Example - information density - same message, fewer tokens:

### Don't take it from me
About 2 months after 12-factor agents was published, context engineering started to become a pretty popular term.
There's also a quite good [Context Engineering Cheat Sheet](https://x.com/lenadroid/status/1943685060785524824) from [@lenadroid](https://x.com/lenadroid) from July 2025.
Recurring theme here: I don't know what's the best approach, but I know you want the flexibility to be able to try EVERYTHING.
[← Own Your Prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) | [Tools Are Structured Outputs →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md)
================================================
FILE: content/factor-04-tools-are-structured-outputs.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 4. Tools are just structured outputs
Tools don't need to be complex. At their core, they're just structured output from your LLM that triggers deterministic code.

For example, lets say you have two tools `CreateIssue` and `SearchIssues`. To ask an LLM to "use one of several tools" is just to ask it to output JSON we can parse into an object representing those tools.
```python
class Issue:
title: str
description: str
team_id: str
assignee_id: str
class CreateIssue:
intent: "create_issue"
issue: Issue
class SearchIssues:
intent: "search_issues"
query: str
what_youre_looking_for: str
```
The pattern is simple:
1. LLM outputs structured JSON
3. Deterministic code executes the appropriate action (like calling an external API)
4. Results are captured and fed back into the context
This creates a clean separation between the LLM's decision-making and your application's actions. The LLM decides what to do, but your code controls how it's done. Just because an LLM "called a tool" doesn't mean you have to go execute a specific corresponding function in the same way every time.
If you recall our switch statement from above
```python
if nextStep.intent == 'create_payment_link':
stripe.paymentlinks.create(nextStep.parameters)
return # or whatever you want, see below
elif nextStep.intent == 'wait_for_a_while':
# do something monadic idk
else: #... the model didn't call a tool we know about
# do something else
```
**Note**: there has been a lot said about the benefits of "plain prompting" vs. "tool calling" vs. "JSON mode" and the performance tradeoffs of each. We'll link some resources to that stuff soon, but not gonna get into it here. See [Prompting vs JSON Mode vs Function Calling vs Constrained Generation vs SAP](https://www.boundaryml.com/blog/schema-aligned-parsing), [When should I use function calling, structured outputs, or JSON mode?](https://www.vellum.ai/blog/when-should-i-use-function-calling-structured-outputs-or-json-mode#:~:text=We%20don%27t%20recommend%20using%20JSON,always%20use%20Structured%20Outputs%20instead) and [OpenAI JSON vs Function Calling](https://docs.llamaindex.ai/en/stable/examples/llm/openai_json_vs_function_calling/).
The "next step" might not be as atomic as just "run a pure function and return the result". You unlock a lot of flexibility when you think of "tool calls" as just a model outputting JSON describing what deterministic code should do. Put this together with [factor 8 own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md).
[← Own Your Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) | [Unify Execution State →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md)
================================================
FILE: content/factor-05-unify-execution-state.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 5. Unify execution state and business state
Even outside the AI world, many infrastructure systems try to separate "execution state" from "business state". For AI apps, this might involve complex abstractions to track things like current step, next step, waiting status, retry counts, etc. This separation creates complexity that may be worthwhile, but may be overkill for your use case.
As always, it's up to you to decide what's right for your application. But don't think you *have* to manage them separately.
More clearly:
- **Execution state**: current step, next step, waiting status, retry counts, etc.
- **Business state**: What's happened in the agent workflow so far (e.g. list of OpenAI messages, list of tool calls and results, etc.)
If possible, SIMPLIFY - unify these as much as possible.
[](https://github.com/user-attachments/assets/e5a851db-f58f-43d8-8b0c-1926c99fc68d)
GIF Version

In reality, you can engineer your application so that you can infer all execution state from the context window. In many cases, execution state (current step, waiting status, etc.) is just metadata about what has happened so far.
You may have things that can't go in the context window, like session ids, password contexts, etc, but your goal should be to minimize those things. By embracing [factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) you can control what actually goes into the LLM
This approach has several benefits:
1. **Simplicity**: One source of truth for all state
2. **Serialization**: The thread is trivially serializable/deserializable
3. **Debugging**: The entire history is visible in one place
4. **Flexibility**: Easy to add new state by just adding new event types
5. **Recovery**: Can resume from any point by just loading the thread
6. **Forking**: Can fork the thread at any point by copying some subset of the thread into a new context / state ID
7. **Human Interfaces and Observability**: Trivial to convert a thread into a human-readable markdown or a rich Web app UI
[← Tools Are Structured Outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) | [Launch/Pause/Resume →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md)
================================================
FILE: content/factor-06-launch-pause-resume.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 6. Launch/Pause/Resume with simple APIs
Agents are just programs, and we have things we expect from how to launch, query, resume, and stop them.
[](https://github.com/user-attachments/assets/feb1a425-cb96-4009-a133-8bd29480f21f)
GIF Version

It should be easy for users, apps, pipelines, and other agents to launch an agent with a simple API.
Agents and their orchestrating deterministic code should be able to pause an agent when a long-running operation is needed.
External triggers like webhooks should enable agents to resume from where they left off without deep integration with the agent orchestrator.
Closely related to [factor 5 - unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) and [factor 8 - own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md), but can be implemented independently.
**Note** - often AI orchestrators will allow for pause and resume, but not between the moment of tool selection and tool execution. See also [factor 7 - contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) and [factor 11 - trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md).
[← Unify Execution State](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) | [Contact Humans With Tools →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)
================================================
FILE: content/factor-07-contact-humans-with-tools.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 7. Contact humans with tool calls
By default, LLM APIs rely on a fundamental HIGH-STAKES token choice: Are we returning plaintext content, or are we returning structured data?

You're putting a lot of weight on that choice of first token, which, in the `the weather in tokyo` case, is
> "the"
but in the `fetch_weather` case, it's some special token to denote the start of a JSON object.
> |JSON>
You might get better results by having the LLM *always* output json, and then declare it's intent with some natural language tokens like `request_human_input` or `done_for_now` (as opposed to a "proper" tool like `check_weather_in_city`).
Again, you might not get any performance boost from this, but you should experiment, and ensure you're free to try weird stuff to get the best results.
```python
class Options:
urgency: Literal["low", "medium", "high"]
format: Literal["free_text", "yes_no", "multiple_choice"]
choices: List[str]
# Tool definition for human interaction
class RequestHumanInput:
intent: "request_human_input"
question: str
context: str
options: Options
# Example usage in the agent loop
if nextStep.intent == 'request_human_input':
thread.events.append({
type: 'human_input_requested',
data: nextStep
})
thread_id = await save_state(thread)
await notify_human(nextStep, thread_id)
return # Break loop and wait for response to come back with thread ID
else:
# ... other cases
```
Later, you might receive a webhook from a system that handles slack, email, sms, or other events.
```python
@app.post('/webhook')
def webhook(req: Request):
thread_id = req.body.threadId
thread = await load_state(thread_id)
thread.events.push({
type: 'response_from_human',
data: req.body
})
# ... simplified for brevity, you likely don't want to block the web worker here
next_step = await determine_next_step(thread_to_prompt(thread))
thread.events.append(next_step)
result = await handle_next_step(thread, next_step)
# todo - loop or break or whatever you want
return {"status": "ok"}
```
The above includes patterns from [factor 5 - unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md), [factor 8 - own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md), [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md), and [factor 4 - tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md), and several others.
If we were using the XML-y formatted from [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md), our context window after a few turns might look like this:
```xml
(snipped for brevity)
From: @alex
Channel: #deployments
Text: Can you deploy backend v1.2.3 to production?
Thread: []
intent: "request_human_input"
question: "Would you like to proceed with deploying v1.2.3 to production?"
context: "This is a production deployment that will affect live users."
options: {
urgency: "high"
format: "yes_no"
}
response: "yes please proceed"
approved: true
timestamp: "2024-03-15T10:30:00Z"
user: "alex@company.com"
intent: "deploy_backend"
tag: "v1.2.3"
environment: "production"
status: "success"
message: "Deployment v1.2.3 to production completed successfully."
timestamp: "2024-03-15T10:30:00Z"
```
Benefits:
1. **Clear Instructions**: Tools for different types of human contact allow for more specificity from the LLM
2. **Inner vs Outer Loop**: Enables agents workflows **outside** of the traditional chatGPT-style interface, where the control flow and context initialization may be `Agent->Human` rather than `Human->Agent` (think, agents kicked off by a cron or an event)
3. **Multiple Human Access**: Can easily track and coordinate input from different humans through structured events
4. **Multi-Agent**: Simple abstraction can be easily extended to support `Agent->Agent` requests and responses
5. **Durable**: Combined with [factor 6 - launch/pause/resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md), this makes for durable, reliable, and introspectable multiplayer workflows
[More on Outer Loop Agents over here](https://theouterloop.substack.com/p/openais-realtime-api-is-a-step-towards)

Works great with [factor 11 - trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md)
[← Launch/Pause/Resume](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) | [Own Your Control Flow →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md)
================================================
FILE: content/factor-08-own-your-control-flow.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 8. Own your control flow
If you own your control flow, you can do lots of fun things.

Build your own control structures that make sense for your specific use case. Specifically, certain types of tool calls may be reason to break out of the loop and wait for a response from a human or another long-running task like a training pipeline. You may also want to incorporate custom implementation of:
- summarization or caching of tool call results
- LLM-as-judge on structured output
- context window compaction or other [memory management](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)
- logging, tracing, and metrics
- client-side rate limiting
- durable sleep / pause / "wait for event"
The below example shows three possible control flow patterns:
- request_clarification: model asked for more info, break the loop and wait for a response from a human
- fetch_git_tags: model asked for a list of git tags, fetch the tags, append to context window, and pass straight back to the model
- deploy_backend: model asked to deploy a backend, this is a high-stakes thing, so break the loop and wait for human approval
```python
def handle_next_step(thread: Thread):
while True:
next_step = await determine_next_step(thread_to_prompt(thread))
# inlined for clarity - in reality you could put
# this in a method, use exceptions for control flow, or whatever you want
if next_step.intent == 'request_clarification':
thread.events.append({
type: 'request_clarification',
data: nextStep,
})
await send_message_to_human(next_step)
await db.save_thread(thread)
# async step - break the loop, we'll get a webhook later
break
elif next_step.intent == 'fetch_open_issues':
thread.events.append({
type: 'fetch_open_issues',
data: next_step,
})
issues = await linear_client.issues()
thread.events.append({
type: 'fetch_open_issues_result',
data: issues,
})
# sync step - pass the new context to the LLM to determine the NEXT next step
continue
elif next_step.intent == 'create_issue':
thread.events.append({
type: 'create_issue',
data: next_step,
})
await request_human_approval(next_step)
await db.save_thread(thread)
# async step - break the loop, we'll get a webhook later
break
```
This pattern allows you to interrupt and resume your agent's flow as needed, creating more natural conversations and workflows.
**Example** - the number one feature request I have for every AI framework out there is we need to be able to interrupt
a working agent and resume later, ESPECIALLY between the moment of tool **selection** and the moment of tool **invocation**.
Without this level of resumability/granularity, there's no way to review/approve the tool call before it runs, which means
you're forced to either:
1. Pause the task in memory while waiting for the long-running thing to complete (think `while...sleep`) and restart it from the beginning if the process is interrupted
2. Restrict the agent to only low-stakes, low-risk calls like research and summarization
3. Give the agent access to do bigger, more useful things, and just yolo hope it doesn't screw up
You may notice this is closely related to [factor 5 - unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) and [factor 6 - launch/pause/resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md), but can be implemented independently.
[← Contact Humans With Tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) | [Compact Errors →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md)
================================================
FILE: content/factor-09-compact-errors.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 9. Compact Errors into Context Window
This one is a little short but is worth mentioning. One of these benefits of agents is "self-healing" - for short tasks, an LLM might call a tool that fails. Good LLMs have a fairly good chance of reading an error message or stack trace and figuring out what to change in a subsequent tool call.
Most frameworks implement this, but you can do JUST THIS without doing any of the other 11 factors. Here's an example:
```python
thread = {"events": [initial_message]}
while True:
next_step = await determine_next_step(thread_to_prompt(thread))
thread["events"].append({
"type": next_step.intent,
"data": next_step,
})
try:
result = await handle_next_step(thread, next_step) # our switch statement
except Exception as e:
# if we get an error, we can add it to the context window and try again
thread["events"].append({
"type": 'error',
"data": format_error(e),
})
# loop, or do whatever else here to try to recover
```
You may want to implement an errorCounter for a specific tool call, to limit to ~3 attempts of a single tool, or whatever other logic makes sense for your use case.
```python
consecutive_errors = 0
while True:
# ... existing code ...
try:
result = await handle_next_step(thread, next_step)
thread["events"].append({
"type": next_step.intent + '_result',
data: result,
})
# success! reset the error counter
consecutive_errors = 0
except Exception as e:
consecutive_errors += 1
if consecutive_errors < 3:
# do the loop and try again
thread["events"].append({
"type": 'error',
"data": format_error(e),
})
else:
# break the loop, reset parts of the context window, escalate to a human, or whatever else you want to do
break
}
}
```
Hitting some consecutive-error-threshold might be a great place to [escalate to a human](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md), whether by model decision or via deterministic takeover of the control flow.
[](https://github.com/user-attachments/assets/cd7ed814-8309-4baf-81a5-9502f91d4043)
[GIF Version](https://github.com/humanlayer/12-factor-agents/blob/main/img/195-factor-09-errors.gif)

Benefits:
1. **Self-Healing**: The LLM can read the error message and figure out what to change in a subsequent tool call
2. **Durable**: The agent can continue to run even if one tool call fails
I'm sure you will find that if you do this TOO much, your agent will start to spin out and might repeat the same error over and over again.
That's where [factor 8 - own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) and [factor 3 - own your context building](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) come in - you don't need to just put the raw error back on, you can completely restructure how it's represented, remove previous events from the context window, or whatever deterministic thing you find works to get an agent back on track.
But the number one way to prevent error spin-outs is to embrace [factor 10 - small, focused agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md).
[← Own Your Control Flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) | [Small Focused Agents →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md)
================================================
FILE: content/factor-1-natural-language-to-tool-calls.md
================================================
[Moved to factor-01-natural-language-to-tool-calls.md](./factor-01-natural-language-to-tool-calls.md)
================================================
FILE: content/factor-10-small-focused-agents.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 10. Small, Focused Agents
Rather than building monolithic agents that try to do everything, build small, focused agents that do one thing well. Agents are just one building block in a larger, mostly deterministic system.

The key insight here is about LLM limitations: the bigger and more complex a task is, the more steps it will take, which means a longer context window. As context grows, LLMs are more likely to get lost or lose focus. By keeping agents focused on specific domains with 3-10, maybe 20 steps max, we keep context windows manageable and LLM performance high.
> #### As context grows, LLMs are more likely to get lost or lose focus
Benefits of small, focused agents:
1. **Manageable Context**: Smaller context windows mean better LLM performance
2. **Clear Responsibilities**: Each agent has a well-defined scope and purpose
3. **Better Reliability**: Less chance of getting lost in complex workflows
4. **Easier Testing**: Simpler to test and validate specific functionality
5. **Improved Debugging**: Easier to identify and fix issues when they occur
### What if LLMs get smarter?
Do we still need this if LLMs get smart enough to handle 100-step+ workflows?
tl;dr yes. As agents and LLMs improve, they **might** naturally expand to be able to handle longer context windows. This means handling MORE of a larger DAG. This small, focused approach ensures you can get results TODAY, while preparing you to slowly expand agent scope as LLM context windows become more reliable. (If you've refactored large deterministic code bases before, you may be nodding your head right now).
[](https://github.com/user-attachments/assets/0cd3f52c-046e-4d5e-bab4-57657157c82f
)
GIF Version

Being intentional about size/scope of agents, and only growing in ways that allow you to maintain quality, is key here. As the [team that built NotebookLM put it](https://open.substack.com/pub/swyx/p/notebooklm?selection=08e1187c-cfee-4c63-93c9-71216640a5f8&utm_campaign=post-share-selection&utm_medium=web):
> I feel like consistently, the most magical moments out of AI building come about for me when I'm really, really, really just close to the edge of the model capability
Regardless of where that boundary is, if you can find that boundary and get it right consistently, you'll be building magical experiences. There are many moats to be built here, but as usual, they take some engineering rigor.
[← Compact Errors](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) | [Trigger From Anywhere →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md)
================================================
FILE: content/factor-11-trigger-from-anywhere.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 11. Trigger from anywhere, meet users where they are
If you're waiting for the [humanlayer](https://humanlayer.dev) pitch, you made it. If you're doing [factor 6 - launch/pause/resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) and [factor 7 - contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md), you're ready to incorporate this factor.

Enable users to trigger agents from slack, email, sms, or whatever other channel they want. Enable agents to respond via the same channels.
Benefits:
- **Meet users where they are**: This helps you build AI applications that feel like real humans, or at the very least, digital coworkers
- **Outer Loop Agents**: Enable agents to be triggered by non-humans, e.g. events, crons, outages, whatever else. They may work for 5, 20, 90 minutes, but when they get to a critical point, they can contact a human for help, feedback, or approval
- **High Stakes Tools**: If you're able to quickly loop in a variety of humans, you can give agents access to higher stakes operations like sending external emails, updating production data and more. Maintaining clear standards gets you auditability and confidence in agents that [perform bigger better things](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md#what-if-llms-get-smarter)
[← Small Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) | [Stateless Reducer →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md)
================================================
FILE: content/factor-12-stateless-reducer.md
================================================
[← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md)
### 12. Make your agent a stateless reducer
Okay so we're over 1000 lines of markdown at this point. This one is mostly just for fun.


[← Trigger From Anywhere](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) | [Appendix - Pre-Fetch Context →](https://github.com/humanlayer/12-factor-agents/blob/main/content/appendix-13-pre-fetch.md)
================================================
FILE: content/factor-2-own-your-prompts.md
================================================
[Moved to factor-02-own-your-prompts.md](./factor-02-own-your-prompts.md)
================================================
FILE: content/factor-3-own-your-context-window.md
================================================
[Moved to factor-03-own-your-context-window.md](./factor-03-own-your-context-window.md)
================================================
FILE: content/factor-4-tools-are-structured-outputs.md
================================================
[Moved to factor-04-tools-are-structured-outputs.md](./factor-04-tools-are-structured-outputs.md)
================================================
FILE: content/factor-5-unify-execution-state.md
================================================
[Moved to factor-05-unify-execution-state.md](./factor-05-unify-execution-state.md)
================================================
FILE: content/factor-6-launch-pause-resume.md
================================================
[Moved to factor-06-launch-pause-resume.md](./factor-06-launch-pause-resume.md)
================================================
FILE: content/factor-7-contact-humans-with-tools.md
================================================
[Moved to factor-07-contact-humans-with-tools.md](./factor-07-contact-humans-with-tools.md)
================================================
FILE: content/factor-8-own-your-control-flow.md
================================================
[Moved to factor-08-own-your-control-flow.md](./factor-08-own-your-control-flow.md)
================================================
FILE: content/factor-9-compact-errors.md
================================================
[Moved to factor-09-compact-errors.md](./factor-09-compact-errors.md)
================================================
FILE: drafts/a2h-spec.md
================================================
# A2H - The Agent-to-Human Protocol
## Overview
A2H is a service that allows an agent to request human interaction
## Why another protocol?
MCP and A2A are not enough
## Shoulds
- Clients should respect A2H_BASE_URL and A2H_API_KEY environment variables if set, to allow for simple oauth2-based authentication to REST services.
## Core Protocol
### Scopes
The A2H protocol supports two scopes:
- The agent side, APIs consumed by an agent to request human interaction
- The (Optional) admin side, APIs consumed by an admin or web application to manage humans and their contact channels
This separation allows for agents to query and find humans to contact, without exposing the human's contact details to the agent. It is the responsibility of the A2H provider to relay agent requests to the appropriate human via that human's preferred contact channel(s).
### Objects
```
apiVersion: proto.a2h.dev/v1alpha1
kind: Message
metatdata:
uid: "123"
spec: # spec sent by agent
message: "" # message from the agent
response_schema:
# optional, json schema for the response,
channel_id:
status: # status resolved by a2h server
humanMessage: "" # message from the human
response:
# optional, matches spec schema
```
```
apiVersion: proto.a2h.dev/v1alpha1
kind: NewConversation
metadata:
uid: "abc"
spec: # spec sent by a2h server
message: "" # message from the agent
channel_id: "123" # channel id to use for future conversations
response_schema:
# optional, json schema for the response,
```
#### HumanContact
```json
{
"run_id": "run_123",
"call_id": "call_456",
"spec": {
"msg": "I've tried using the tool to refund the customer but its returning a 500 error. Can you help?",
"channel": {
"slack": {
"channel_or_user_id": "U1234567890",
"context_about_channel_or_user": "Support team lead"
}
},
},
}
```
A HumanContact represents a request for human interaction. It contains:
- `run_id` (string): Unique identifier for the run
- `call_id` (string): Unique identifier for the contact request
- `spec` (HumanContactSpec): The specification for the contact request
- `status` (HumanContactStatus, optional): The current status of the contact request
The HumanContactSpec contains:
- `msg` (string): The message to send to the human
- `subject` (string, optional): Subject of the contact request
- `channel` (ContactChannel, optional): The channel to use for contact
- `response_options` (ResponseOption[], optional): Available response options
- `state` (object, optional): Additional state information
The HumanContactStatus contains:
- `requested_at` (datetime, optional): When the contact was requested
- `responded_at` (datetime, optional): When the human responded
- `response` (string, optional): The human's response
- `response_option_name` (string, optional): Name of the selected response option
- `slack_message_ts` (string, optional): Slack message timestamp if applicable
- `failed_validation_details` (object, optional): Details if validation failed
#### FunctionCall
Example:
```json
{
"run_id": "run_789",
"call_id": "call_101",
"spec": {
"fn": "process_payment",
"kwargs": {
"amount": 100.00,
"currency": "USD",
"recipient": "merchant_123"
},
"channel": {
"email": {
"address": "ap@example.com",
}
},
},
"status": {
"requested_at": "2024-03-20T11:00:00Z",
"responded_at": "2024-03-20T11:02:00Z",
"approved": true,
"comment": "Payment looks good, approved",
"user_info": {
"name": "John Doe",
"role": "Finance Manager"
},
"slack_message_ts": "1234567890.123457"
}
}
```
A FunctionCall represents a request for human approval of a function execution. It contains:
- `run_id` (string): Unique identifier for the run
- `call_id` (string): Unique identifier for the function call
- `spec` (FunctionCallSpec): The specification for the function call
- `status` (FunctionCallStatus, optional): The current status of the function call
The FunctionCallSpec contains:
- `fn` (string): The function to be called
- `kwargs` (object): The keyword arguments for the function
- `channel` (ContactChannel, optional): The channel to use for contact
- `reject_options` (ResponseOption[], optional): Available rejection options
- `state` (object, optional): Additional state information
The FunctionCallStatus contains:
- `requested_at` (datetime, optional): When the approval was requested
- `responded_at` (datetime, optional): When the human responded
- `approved` (boolean, optional): Whether the function call was approved
- `comment` (string, optional): Any comment from the human
- `user_info` (object, optional): Information about the responding user
- `slack_context` (object, optional): Slack-specific context
- `reject_option_name` (string, optional): Name of the selected rejection option
- `slack_message_ts` (string, optional): Slack message timestamp if applicable
- `failed_validation_details` (object, optional): Details if validation failed
#### ContactChannel
Example:
```json
{
"slack": {
"channel_or_user_id": "U1234567890",
"context_about_channel_or_user": "Support team lead",
"allowed_responder_ids": ["U1234567890", "U2345678901"],
"experimental_slack_blocks": true,
"thread_ts": "1234567890.123456"
}
}
```
or
```json
{
"email": {
"address": "ap@example.com",
"context_about_user": "Accounts Payable",
"in_reply_to_message_id": "1234567890",
"references_message_id": "1234567890",
"template": "..."
}
}
```
A ContactChannel represents a channel through which a human can be contacted. The protocol supports several channel types:
1. SlackContactChannel:
- `channel_or_user_id` (string): The Slack channel or user ID
- `context_about_channel_or_user` (string, optional): Additional context
- `bot_token` (string, optional): Bot token for authentication
- `allowed_responder_ids` (string[], optional): IDs of allowed responders
- `experimental_slack_blocks` (boolean, optional): Enable experimental blocks
- `thread_ts` (string, optional): Thread timestamp for threaded messages
2. SMSContactChannel:
- `phone_number` (string): The phone number to contact
- `context_about_user` (string, optional): Additional context about the user
3. WhatsAppContactChannel:
- `phone_number` (string): The phone number to contact
- `context_about_user` (string, optional): Additional context about the user
#### Human (Agent Side)
From the agent's perspective, a human is an object that has a name and description.
#### Human (Admin Side)
From the admin's perspective, a human is an object that has a name, description, and a list of prioritized contact channels, with details
### Agent Endpoints
#### POST /human_contacts
#### GET /human_contacts/:call_id
#### POST /function_calls
#### GET /function_calls/:call_id
## Extended Protocol
- Admin Humans
- Agent Humans Get
- Agent Humans Search
- Agent Channels List
- Agent Channels validate
### Objects
#### Human (Agent Side)
From the agent's perspective, a human is an object that has a name and description.
#### Human (Admin Side)
From the admin's perspective, a human is an object that has a name, description, and a list of prioritized contact channels, with details
### Agent Endpoints
#### GET /channels
return what contact channels are available and their supported fields
example response:
```json
{
"channels": {
"slack": {
"channelOrUserId": {
"type": "string",
"description": "The Slack channel or user ID to send messages to"
},
"contextAboutChannelOrUser": {
"type": "string",
"description": "Additional context about the Slack channel or user"
}
},
"email": {
"address": {
"type": "string",
"description": "Email address to send messages to"
},
"contextAboutUser": {
"type": "string",
"description": "Additional context about the email recipient"
},
"inReplyToMessageId": {
"type": "string",
"description": "The message ID of the email to reply to"
},
"referencesMessageId": {
"type": "string",
"description": "The message ID of the email to reference"
}
}
}
}
```
#### GET /humans
return a list of humans that are available to interact with
example response:
```json
{
"humans": [
{
"id": "654",
"name": "Jane Doe",
"description": "Jane Doe is a human who knows about technology and entrepreneurship",
},
{
"id": "123",
"name": "John Doe",
"description": "John Doe is a human who knows about sales and marketing"
}
]
}
#### GET /humans/search?q=
search for humans by name or description
example response:
```json
{
"humans": [
{
"id": "654",
"name": "Jane Doe",
"description": "Jane Doe is a human who knows about technology and entrepreneurship",
},
]
}
```
### Administrative Endpoints
#### POST /humans
Enroll a new human for agent contact
example request:
```json
{
"name": "John Doe",
"description": "John Doe is a human who knows about sales and marketing",
"prioritizedContactChannels": [
{
"slack": {
"channelOrUserId": "U1234567890",
}
},
{
"email": {
"address": "john.doe@example.com",
}
}
]
}
```
#### GET /humans/:id
Get a human by id
example response:
```json
================================================
FILE: drafts/ah2-openapi.json
================================================
================================================
FILE: hack/contributors_markdown/.python-version
================================================
3.13
================================================
FILE: hack/contributors_markdown/README.md
================================================
================================================
FILE: hack/contributors_markdown/contributors_markdown.py
================================================
#!/usr/bin/env python3
"""
Generate a Markdown grid of contributor avatars for a GitHub repository.
Usage:
python generate_contributors_grid.py --repo owner/name --token GH_TOKEN [--cols 7] [--image_size 80] [--output FILE]
Arguments:
--repo GitHub repository in "owner/name" form (e.g. "octocat/Hello-World")
--token Personal access token with `public_repo` scope (or `repo` for private).
Can also be provided via the GITHUB_TOKEN environment variable.
--cols Number of avatars per row in the generated grid (default 7).
--image_size Pixel width for avatars (GitHub automatically resizes; default 80).
--output File to write the Markdown grid into (default: stdout, use '-' for stdout).
The generated file contains a Markdown table‑less grid of linked avatars that can
be embedded in README.md or any other Markdown document.
"""
from __future__ import annotations
import argparse
import os
import sys
import textwrap
from typing import List, Dict
import requests
API_URL_TEMPLATE = "https://api.github.com/repos/{owner}/{repo}/contributors"
def fetch_contributors(owner: str, repo: str, token: str | None, per_page: int = 100) -> List[Dict]:
"""Return a list of contributor objects from the GitHub REST API."""
headers = {"Accept": "application/vnd.github+json"}
if token:
headers["Authorization"] = f"Bearer {token}"
contributors: List[Dict] = []
page = 1
while True:
url = f"{API_URL_TEMPLATE.format(owner=owner, repo=repo)}?per_page={per_page}&page={page}"
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
batch = response.json()
if not batch:
break
contributors.extend(batch)
if len(batch) < per_page:
break
page += 1
return contributors
def build_markdown(contributors: List[Dict], cols: int = 7, image_size: int = 80) -> str:
"""Return a Markdown fragment containing a grid of linked avatar images."""
lines: List[str] = []
row: List[str] = []
for contributor in contributors:
login = contributor["login"]
avatar = f"{contributor['avatar_url']}&s={image_size}"
profile = contributor["html_url"]
cell = f'[
]({profile})'
row.append(cell)
if len(row) == cols:
lines.append(" ".join(row))
row = []
if row:
lines.append(" ".join(row))
return "\n\n".join(lines)
def main() -> None:
parser = argparse.ArgumentParser(description="Generate a Markdown grid of contributor avatars")
parser.add_argument("--repo", required=True, help="GitHub repo in owner/name form")
parser.add_argument("--token", help="GitHub Personal Access Token (or set GITHUB_TOKEN env)")
parser.add_argument("--cols", type=int, default=7, help="Number of avatars per row (default 7)")
parser.add_argument("--image_size", type=int, default=80, help="Avatar size in px (default 80)")
parser.add_argument("--output", "-o", default="-", help="Output file (default: stdout, use '-' for stdout)")
args = parser.parse_args()
token = args.token or os.getenv("GITHUB_TOKEN")
if not token:
parser.error("A GitHub token must be supplied via --token or GITHUB_TOKEN env var.")
if "/" not in args.repo:
parser.error("--repo must be in 'owner/name' form")
owner, repo = args.repo.split("/", 1)
contributors = fetch_contributors(owner, repo, token)
if not contributors:
sys.exit("No contributors found. Is the repository correct and does the token have access?")
markdown = build_markdown(contributors, cols=args.cols, image_size=args.image_size)
header = textwrap.dedent(
f"""
## Contributors
Thanks to these wonderful people:\n
"""
)
if args.output == "-":
sys.stdout.write(header)
sys.stdout.write(markdown)
sys.stdout.write("\n")
else:
with open(args.output, "w", encoding="utf-8") as fh:
fh.write(header)
fh.write(markdown)
fh.write("\n")
print(f"Wrote {len(contributors)} contributors to {args.output}", file=sys.stderr)
if __name__ == "__main__":
main()
================================================
FILE: hack/contributors_markdown/pyproject.toml
================================================
[project]
name = "contributors-markdown"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"requests>=2.32.3",
]
================================================
FILE: packages/create-12-factor-agent/template/.gitignore
================================================
baml_client/
node_modules/
.threads/
================================================
FILE: packages/create-12-factor-agent/template/README.md
================================================
# Chapter 0 - Hello World
Let's start with a basic TypeScript setup and a hello world program.
This guide is written in TypeScript (yes, a python version is coming soon)
There are many checkpoints between the every file edit in theworkshop steps,
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.
To run this guide, you'll need a relatively recent version of nodejs and npm installed
You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine
brew install node@20
You should see the node version
node --version
Copy initial package.json
cp ./walkthrough/00-package.json package.json
Install dependencies
npm install
Copy tsconfig.json
cp ./walkthrough/00-tsconfig.json tsconfig.json
add .gitignore
cp ./walkthrough/00-.gitignore .gitignore
Create src folder
mkdir -p src
Add a simple hello world index.ts
cp ./walkthrough/00-index.ts src/index.ts
Run it to verify
npx tsx src/index.ts
You should see:
hello, world!
# Chapter 1 - CLI and Agent Loop
Now let's add BAML and create our first agent with a CLI interface.
First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.
npm install @boundaryml/baml
Initialize BAML
npx baml-cli init
Remove default resume.baml
rm baml_src/resume.baml
Add our starter agent, a single baml prompt that we'll build on
cp ./walkthrough/01-agent.baml baml_src/agent.baml
Generate BAML client code
npx baml-cli generate
Enable BAML logging for this section
export BAML_LOG=debug
Add the CLI interface
cp ./walkthrough/01-cli.ts src/cli.ts
Update index.ts to use the CLI
cp ./walkthrough/01-index.ts src/index.ts
Add the agent implementation
cp ./walkthrough/01-agent.ts src/agent.ts
The the BAML code is configured to use BASETEN_API_KEY by default
To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co),
and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/).
```rust
function DetermineNextStep(thread: string) -> DoneForNow {
client Qwen3
// ...
```
If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key.
If you want to try swapping out the model, you can change the `client` line.
[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)
For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini)
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
For example, to use openai with an OPENAI_API_KEY, you can do:
client "openai/gpt-4o"
Set your env vars
export BASETEN_API_KEY=...
export BASETEN_BASE_URL=...
Try it out
npx tsx src/index.ts hello
you should see a familiar response from the model
{
intent: 'done_for_now',
message: 'Hello! How can I assist you today?'
}
# Chapter 2 - Add Calculator Tools
Let's add some calculator tools to our agent.
Let's start by adding a tool definition for the calculator
These are simpile structured outputs that we'll ask the model to
return as a "next step" in the agentic loop.
cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps
cp ./walkthrough/02-agent.baml baml_src/agent.baml
Generate updated BAML client
npx baml-cli generate
Try out the calculator
npx tsx src/index.ts 'can you add 3 and 4'
You should see a tool call to the calculator
{
intent: 'add',
a: 3,
b: 4
}
# Chapter 3 - Process Tool Calls in a Loop
Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.
First, lets update the agent to handle the tool call
cp ./walkthrough/03-agent.ts src/agent.ts
Now, lets try it out
npx tsx src/index.ts 'can you add 3 and 4'
you should see the agent call the tool and then return the result
{
intent: 'done_for_now',
message: 'The sum of 3 and 4 is 7.'
}
For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output
export BAML_LOG=off
Try a multi-step calculation
npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
you'll notice that tools like multiply and divide are not available
npx tsx src/index.ts 'can you multiply 3 and 4'
next, let's add handlers for the rest of the calculator tools
cp ./walkthrough/03b-agent.ts src/agent.ts
Test subtraction
npx tsx src/index.ts 'can you subtract 3 from 4'
now, let's test the multiplication tool
npx tsx src/index.ts 'can you multiply 3 and 4'
finally, let's test a more complex calculation with multiple operations
npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
congratulations, you've taking your first step into hand-rolling an agent loop.
from here, we're going to start incorporating some more intermediate and advanced
concepts for 12-factor agents.
# Chapter 4 - Add Tests to agent.baml
Let's add some tests to our BAML agent.
to start, leave the baml logs enabled
export BAML_LOG=debug
next, let's add some tests to the agent
We'll start with a simple test that checks the agent's ability to handle
a basic calculation.
cp ./walkthrough/04-agent.baml baml_src/agent.baml
Run the tests
npx baml-cli test
now, let's improve the test with assertions!
Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.
cp ./walkthrough/04b-agent.baml baml_src/agent.baml
Run the tests
npx baml-cli test
as you add more tests, you can disable the logs to keep the output clean.
You may want to turn them on as you iterate on specific tests.
export BAML_LOG=off
now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window
cp ./walkthrough/04c-agent.baml baml_src/agent.baml
let's try to run it
npx baml-cli test
# Chapter 5 - Multiple Human Tools
In this section, we'll add support for multiple tools that serve to
contact humans.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
first, let's add a tool that can request clarification from a human
this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.
cp ./walkthrough/05-agent.baml baml_src/agent.baml
next, let's re-generate the client code
NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.
npx baml-cli generate
now, let's update the agent to use the new tool
cp ./walkthrough/05-agent.ts src/agent.ts
next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI
cp ./walkthrough/05-cli.ts src/cli.ts
let's try it out
npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '
next, let's add a test that checks the agent's ability to handle
a clarification request
cp ./walkthrough/05b-agent.baml baml_src/agent.baml
and now we can run the tests again
npx baml-cli test
you'll notice the new test passes, but the hello world test fails
This is because the agent's default behavior is to return "done_for_now"
cp ./walkthrough/05c-agent.baml baml_src/agent.baml
Verify tests pass
npx baml-cli test
# Chapter 6 - Customize Your Prompt with Reasoning
In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.
this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)
for this section, it will be helpful to leave the baml logs enabled
export BAML_LOG=debug
update the agent prompt to include a reasoning step
cp ./walkthrough/06-agent.baml baml_src/agent.baml
generate the updated client
npx baml-cli generate
now, you can try it out with a simple prompt
npx tsx src/index.ts 'can you multiply 3 and 4'
you should see output from the baml logs showing the reasoning steps
#### optional challenge
add a field to your tool output format that includes the reasoning steps in the output!
# Chapter 7 - Customize Your Context Window
In this section, we'll explore how to customize the context window
of the agent.
this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
update the agent to pretty-print the Context window for the model
cp ./walkthrough/07-agent.ts src/agent.ts
Test the formatting
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
next, let's update the agent to use XML formatting instead
this is a very popular format for passing data to a model,
among other things, because of the token efficiency of XML.
cp ./walkthrough/07b-agent.ts src/agent.ts
let's try it out
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
lets update our tests to match the new output format
cp ./walkthrough/07c-agent.baml baml_src/agent.baml
check out the updated tests
npx baml-cli test
# Chapter 8 - Adding API Endpoints
Add an Express server to expose the agent via HTTP.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Install Express and types
npm install express && npm install --save-dev @types/express supertest
Add the server implementation
cp ./walkthrough/08-server.ts src/server.ts
Start the server
npx tsx src/server.ts
Test with curl (in another terminal)
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you add 3 and 4"}'
You should get an answer from the agent which includes the
agentic trace, ending in a message like:
{"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}
# Chapter 9 - In-Memory State and Async Clarification
Add state management and async clarification support.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Add some simple in-memory state management for threads
cp ./walkthrough/09-state.ts src/state.ts
update the server to use the state management
* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id
* implement POST /thread/:id/response
cp ./walkthrough/09-server.ts src/server.ts
Start the server
npx tsx src/server.ts
Test clarification flow
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you multiply 3 and xyz"}'
# Chapter 10 - Adding Human Approval
Add support for human approval of operations.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
update the server to handle human approvals
* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs
cp ./walkthrough/10-server.ts src/server.ts
Add a few methods to the agent to handle approvals and responses
cp ./walkthrough/10-agent.ts src/agent.ts
Start the server
npx tsx src/server.ts
Test division with approval
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you divide 3 by 4"}'
You should see:
{
"thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
}
]
}
reject the request with another curl call, changing the thread ID
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'
You should see: the last tool call is now `"intent":"divide","a":3,"b":5`
{
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
},
{
"type": "tool_response",
"data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 5,
"response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
}
}
]
}
now you can approve the operation
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": true}'
you should see the final message includes the tool response and final result!
...
{
"type": "tool_response",
"data": 0.5
},
{
"type": "done_for_now",
"message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
"response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
}
# Chapter 11 - Human Approvals over email
in this section, we'll add support for human approvals over email.
This will start a little bit contrived, just to get the concepts down -
We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI
While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Install HumanLayer
npm install humanlayer
Update CLI to send `divide` and `request_more_information` to a human via email
cp ./walkthrough/11-cli.ts src/cli.ts
Run the CLI
npx tsx src/index.ts 'can you divide 4 by 5'
The last line of your program should mention human review step
nextStep { intent: 'divide', a: 4, b: 5 }
HumanLayer: Requested human approval from HumanLayer cloud
go ahead and respond to the email with some feedback:

you should get another email with an updated attempt based on your feedback!
You can go ahead and approve this one:

and your final output will look like
nextStep {
intent: 'done_for_now',
message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
}
The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!
lets implement the `request_more_information` flow as well
cp ./walkthrough/11b-cli.ts src/cli.ts
lets test the require_approval flow as by asking for a calculation
with garbled input:
npx tsx src/index.ts 'can you multiply 4 and xyz'
You should get an email with a request for clarification
Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?
you can response with something like
use 8 instead of xyz
you should see a final result on the CLI like
I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.
as a final step, lets explore using a custom html template for the email
cp ./walkthrough/11c-cli.ts src/cli.ts
first try with divide:
npx tsx src/index.ts 'can you divide 4 by 5'
you should see a slightly different email with the custom template

feel free to run with the flow and then you can try updating the template to your liking
(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)
try triggering "request_more_information" as well!
thats it - in the next chapter, we'll build a fully email-driven
workflow agent that uses webhooks for human approval
# Chapter XX - HumanLayer Webhook Integration
the previous sections used the humanlayer SDK in "synchronous mode" - that
means every time we wait for human approval, we sit in a loop
polling until the human response if received.
That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results.
add code to initialize humanlayer in the server
cp ./walkthrough/12-1-server-init.ts src/server.ts
next, lets update the /thread endpoint to
1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls
Update the server to be able to handle request_clarification responses
- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response
cp ./walkthrough/12a-server.ts src/server.ts
Start the server in another terminal
npx tsx src/server.ts
now that the server is running, send a payload to the '/thread' endpoint
__ do the response step
__ now handle approvals for divide
__ now also handle done_for_now
================================================
FILE: packages/create-12-factor-agent/template/baml_src/agent.baml
================================================
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
class RequestApprovalFromManager {
intent "request_approval_from_manager"
message string
}
class ProcessRefund {
intent "process_refund" @description("you can process a refund for a customer, always request approval from the manager before processing a refund")
order_id string
amount int | float
reason string
}
type HumanTools = ClarificationRequest | DoneForNow | RequestApprovalFromManager
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
type CustomerSupportTools = ProcessRefund
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools | CustomerSupportTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: packages/create-12-factor-agent/template/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: packages/create-12-factor-agent/template/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.88.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: packages/create-12-factor-agent/template/baml_src/tool_calculator.baml
================================================
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: packages/create-12-factor-agent/template/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"@boundaryml/baml": "latest",
"express": "^5.1.0",
"humanlayer": "^0.7.7",
"tsx": "^4.15.0",
"typescript": "^5.0.0",
"zod": "^3.25.64"
},
"devDependencies": {
"@types/express": "^5.0.1",
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0",
"supertest": "^7.1.0"
}
}
================================================
FILE: packages/create-12-factor-agent/template/src/a2h.ts
================================================
import { z, ZodSchema } from 'zod';
// Types for A2H API objects matching the new schemas
// Common metadata type
export type Metadata = {
uid: string;
};
// Message sent by agent to a2h server
type MessageSpec> = {
agentMessage: string; // message from the agent
response_schema?: T; // optional Zod schema for the response
channel_id?: string; // optional channel id
};
export type Message = ZodSchema> = {
apiVersion: "proto.a2h.dev/v1alpha1";
kind: "Message";
metadata: Metadata;
spec: MessageSpec;
status?: {
humanMessage?: string; // message from the human
response?: T extends ZodSchema ? z.infer : any; // optional, matches spec schema
};
};
export const ApprovalSchema = z.object({
approved: z.boolean(),
comment: z.string().optional(),
});
export type ApprovalRequest = Message;
export type HumanRequest = Message;
// NewConversation sent by a2h server to agent
type NewConversationSpec = {
user_message: string; // message from the human
channel_id: string; // channel id to use for future conversations
agent_name?: string; // optional agent name or identifier
raw?: Record; // optional raw data from the request, e.g. email metadata
};
export type NewConversation = {
apiVersion: "proto.a2h.dev/v1alpha1";
kind: "NewConversation";
metadata: Metadata;
spec: NewConversationSpec;
};
// Optionally, you can add union types for future extensibility
export type A2HEvent = ZodSchema> = Message | NewConversation;
================================================
FILE: packages/create-12-factor-agent/template/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
awaitingHumanResponse(): boolean {
const lastEvent = this.events[this.events.length - 1];
return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
}
awaitingHumanApproval(): boolean {
const lastEvent = this.events[this.events.length - 1];
return lastEvent.data.intent === 'divide';
}
lastEvent(): Event {
return this.events[this.events.length - 1];
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
case "request_approval_from_manager":
// response to human, return the thread
return thread;
case "divide":
// divide is scary, return it for human approval
return thread;
case "add":
case "subtract":
case "multiply":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: packages/create-12-factor-agent/template/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event, handleNextStep } from "../src/agent";
import { FileSystemThreadStore } from "./state";
import chalk from "chalk";
const threadStore = new FileSystemThreadStore();
export async function cliOuterLoop(message: string) {
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
const threadId = await threadStore.create(thread);
// Run the agent loop with the thread
// loop until ctrl+c
// optional, you could exit on done_for_now and print the final result
// while (lastEvent.data.intent !== "done_for_now") {
while (true) {
let newThread = await agentLoop(thread);
await threadStore.update(threadId, newThread);
let lastEvent = newThread.lastEvent();
// everything on CLI
const responseEvent = await askHumanCLI(lastEvent);
newThread.events.push(responseEvent);
// if (lastEvent.data.intent === "request_approval_from_manager") {
// const responseEvent = await askManager(lastEvent);
// thread.events.push(responseEvent);
// } else {
// const responseEvent = await askHumanCLI(lastEvent);
// thread.events.push(responseEvent);
// }
await threadStore.update(threadId, newThread);
}
}
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
const message = args.length === 0 ? "hello!" : args.join(" ");
await cliOuterLoop(message);
}
// async function askManager(lastEvent: Event): Promise {
// const hl = humanlayer({
// contactChannel: {
// email: {
// address: process.env.HUMANLAYER_EMAIL_ADDRESS || "manager@example.com"
// }
// }
// })
// const resp = await hl.fetchHumanResponse({
// spec: {
// msg: lastEvent.data.message
// }
// })
// return {
// type: "manager_response",
// data: resp
// }
// }
async function askHumanCLI(lastEvent: Event): Promise {
switch (lastEvent.data.intent) {
case "divide":
const response = await approveCLI(`agent wants to run ${chalk.green(JSON.stringify(lastEvent.data))}\nPress Enter to approve, or type feedback to cancel:`);
if (response.approved) {
const thread = new Thread([lastEvent]);
const result = await handleNextStep(lastEvent.data, thread);
return result.events[result.events.length - 1];
} else {
return {
type: "tool_response",
data: `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}`
};
}
case "request_more_information":
case "request_approval_from_manager":
case "done_for_now":
const message = await messageCLI(lastEvent.data.message);
return {
type: "tool_response",
data: message
};
default:
throw new Error(`unknown tool in outer loop: ${lastEvent.data.intent}`)
}
}
type Approval = {
approved: true;
} | {
approved: false;
comment: string;
}
async function messageCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
readline.close();
resolve(answer);
});
});
}
async function approveCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
readline.close();
// If the answer is empty (just pressed enter), treat it as approval
if (answer.trim() === '') {
resolve({ approved: true });
} else {
// Any non-empty response is treated as rejection with feedback
resolve({ approved: false, comment: answer });
}
});
});
}
if (require.main === module) {
cli()
}
================================================
FILE: packages/create-12-factor-agent/template/src/index.ts
================================================
import { cli } from "./cli"
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: packages/create-12-factor-agent/template/src/server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop as innerLoop, handleNextStep } from '../src/agent';
import { FileSystemThreadStore, ThreadStore } from '../src/state';
import { ContactChannel, FunctionCall, HumanContact, humanlayer, V1Beta2EmailEventReceived, V1Beta2HumanContactCompleted, V1Beta2SlackEventReceived } from '@humanlayer/sdk';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new FileSystemThreadStore();
type V1Beta3ConversationCreated = {
is_test: boolean;
type: "conversation.created";
event: {
user_message: string;
contact_channel_id: number;
agent_name: string;
}
}
type CompletedHumanContact = HumanContact & {
status: {
response: string;
}
}
type V1Veta3HumanContactCompleted = {
is_test: boolean;
type: "human_contact.completed";
event: {
contact_channel_id: number;
} & CompletedHumanContact
}
type Approved = {status: {approved: true}}
type Rejected = {status: {approved: false; comment: string}}
type CompletedFunctionCall = FunctionCall & (Approved | Rejected)
type V1Beta3FunctionCallCompleted = {
is_test: boolean;
type: "function_call.completed";
event: {
contact_channel_id: number;
} & CompletedFunctionCall
}
type V1Beta3Event = V1Beta3ConversationCreated | V1Veta3HumanContactCompleted | V1Beta3FunctionCallCompleted;
const notFound = (res: Response) => {
res.status(404).json({
error: 'Not Found',
message: `Thread not found`,
status: 404
});
}
const outerLoop = async (req: Request, res: Response) => {
console.log("outerLoop", req.body);
const body = req.body as V1Beta3Event;
const hl = humanlayer({
runId: process.env.HUMANLAYER_RUN_ID || `12fa-agent`,
contactChannel: {
channel_id: body.event.contact_channel_id,
} as ContactChannel // todo export this type flavor
});
/* get the thread or make a new one*/
let thread: Thread | undefined;
let threadId: string | undefined;
switch (body.type) {
case "conversation.created":
thread = new Thread([{type: "conversation.created", data: body.event.user_message}]);
break;
case "human_contact.completed":
case "function_call.completed":
threadId = body.event.spec.state?.thread_id;
if (!threadId) {
notFound(res);
return;
}
thread = store.get(threadId);
if (!thread) {
notFound(res);
return;
}
break;
}
/* handle the response event */
if (body.type === "function_call.completed" && body.event.status?.approved) {
// run the function call and add the result to the thread
thread = await handleNextStep(thread.lastEvent().data, thread);
} else if (body.type === "function_call.completed" && !body.event.status?.approved) {
// add the denial to the thread
thread.events.push({
type: "human_response",
data: `user denied operation ${thread.lastEvent().data.intent} with feedback: ${body.event.status?.comment}`
});
} else if (body.type === "human_contact.completed") {
// add the human response to the thread
thread.events.push({
type: "human_response",
data: {
msg: body.event.status.response,
}
});
}
/* run the inner loop */
await Promise.resolve().then(async() => {
const newThread = await innerLoop(thread);
if (threadId) {
store.update(threadId, newThread);
} else {
threadId = store.create(newThread);
}
// we exited the inner loop, send to human
const lastEvent = newThread.lastEvent();
switch (lastEvent.data.intent) {
case "request_more_information":
case "done_for_now":
hl.createHumanContact({
spec: {
msg: lastEvent.data.message,
state: {
thread_id: threadId
}
}
});
console.log(`created human contact "${lastEvent.data.message}"`);
break;
case "other_scary_tools": // example, add more tools here
case "divide":
const intent = lastEvent.data.intent;
// remove intent from kwargs payload
const { intent: _, ...kwargs } = lastEvent.data;
hl.createFunctionCall({
spec: {
fn: intent,
kwargs: kwargs,
state: {
thread_id: threadId
}
}
});
console.log("created function call", {intent, kwargs});
break;
}
});
res.json({ status: "ok" });
}
export const startServer = () => {
app.post('/api/v1/conversations', outerLoop)
// Handle 404 - Not Found
app.use((req: Request, res: Response) => {
res.status(404).json({
error: 'Not Found',
message: `Route ${req.originalUrl} not found`,
status: 404
});
});
const port = process.env.PORT || 8000;
const server = app.listen(port, () => {
console.log(`Server is running on port ${port}`);
});
server.on('error', (error: Error) => {
console.error('Server error:', error);
});
return server;
}
// Only start the server if this file is being run directly
if (require.main === module) {
startServer();
}
================================================
FILE: packages/create-12-factor-agent/template/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';
import { Response } from 'express';
import fs from 'fs/promises';
import path from 'path';
export interface ThreadStore {
create(thread: Thread): Promise;
get(id: string): Promise;
update(id: string, thread: Thread): Promise;
}
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class FileSystemThreadStore implements ThreadStore {
private threadsDir: string;
constructor() {
this.threadsDir = path.join(process.cwd(), '.threads');
}
async create(thread: Thread): Promise {
await fs.mkdir(this.threadsDir, { recursive: true });
const id = crypto.randomUUID();
const filePath = path.join(this.threadsDir, `${id}.json`);
const txtPath = path.join(this.threadsDir, `${id}.txt`);
await Promise.all([
fs.writeFile(filePath, JSON.stringify(thread, null, 2)),
fs.writeFile(txtPath, thread.serializeForLLM())
]);
return id;
}
async get(id: string): Promise {
const filePath = path.join(this.threadsDir, `${id}.json`);
const data = await fs.readFile(filePath, 'utf8').catch(() => null);
if (!data) return undefined;
return new Thread(JSON.parse(data).events);
}
async update(id: string, thread: Thread): Promise {
const filePath = path.join(this.threadsDir, `${id}.json`);
const txtPath = path.join(this.threadsDir, `${id}.txt`);
await Promise.all([
fs.writeFile(filePath, JSON.stringify(thread, null, 2)),
fs.writeFile(txtPath, thread.serializeForLLM())
]);
}
}
================================================
FILE: packages/create-12-factor-agent/template/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: packages/walkthroughgen/.gitignore
================================================
.tmptest*
================================================
FILE: packages/walkthroughgen/examples/typescript/.gitignore
================================================
build/
================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/00-package-lock.json
================================================
{
"name": "walkthroughgen",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "walkthroughgen",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"typescript": "^5.8.3"
}
}
},
"node_modules/typescript": {
"version": "5.8.3",
"resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz",
"integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==",
"license": "Apache-2.0",
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
},
"engines": {
"node": ">=14.17"
}
}
}
================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/00-package.json
================================================
{
"name": "walkthroughgen",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "",
"dependencies": {
"typescript": "^5.8.3"
}
}
================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/00-tsconfig.json
================================================
{
"compilerOptions": {
"target": "es2016",
"module": "commonjs",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": true,
"skipLibCheck": true
},
"exclude": ["node_modules", "dist", "**/*walkthrough/**"]
}
================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/01-index.ts
================================================
const main = () => {
console.log("hello world");
};
main();
================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/02-cli.ts
================================================
const cli = () => {
const args = process.argv.slice(2);
const command = args[0];
const name = args[1];
if (command === "create") {
console.log(`Creating ${name}`);
} else {
console.log("Invalid command: ", command);
console.log("available commands: create");
}
};
cli();
================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough/02-index.ts
================================================
const main = async () => {
return cli();
};
main().catch(console.error);
================================================
FILE: packages/walkthroughgen/examples/typescript/walkthrough.yaml
================================================
title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"
targets:
- markdown: "./build/walkthrough.md" # generates a walkthrough.md file
onChange: # default behavior - on changes, show diffs and cp commands
diff: true
cp: true
newFiles: # when new files are created, just show the copy command
cat: false
cp: true
- final: "./build/final" # outputs the final project to the final folder
- folders: "./build/by-section" # creates a separate working folder for each section
sections:
- name: setup
title: "Copy initial files"
steps:
- file: {src: ./walkthrough/00-package.json, dest: package.json}
- file: {src: ./walkthrough/00-package-lock.json, dest: package-lock.json}
- file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json}
- name: initialize
title: "Initialize the project"
steps:
- text: "initialize the project"
command: |
npm install
- text: "then add index.ts"
file: {src: ./walkthrough/01-index.ts, dest: src/index.ts}
- text: "run it with tsx"
command: |
npx tsx src/index.ts
results:
- text: "you should see a hello world message"
code: |
hello world
- name: add-cli
title: "Add a CLI"
steps:
- text: "add a cli"
file: {src: ./walkthrough/02-cli.ts, dest: src/cli.ts}
- text: "update index.ts to use the cli"
file: {src: ./walkthrough/02-index.ts, dest: src/index.ts}
================================================
FILE: packages/walkthroughgen/examples/walkthroughgen/walkthrough.yaml
================================================
title: "using walkthroughgen"
targets:
- markdown: "./walkthrough.md" # generates a walkthrough.md file
diffs: true
- final: "./final" # outputs the final project to the final folder
- folders: "./by-section" # creates a separate working folder for each section
init:
- file: {src: ./walkthrough/00-package.json, dest: package.json}
- file: {src: ./walkthrough/00-package-lock.json, dest: package-lock.json}
sections:
- name: initialize
title: "initialize the project"
steps:
- text: "initialize walkthroughgen"
command: |
npx wtg init my-project
cd my-project
- text: "this will create an empty project with a walkthrough.yaml file"
command: |
ls -la
cat walkthrough.yaml
results:
- text: "you should see a walkthrough.yaml file"
code: |
# walkthrough.yaml
title: "hello world"
sections:
- name: initialize
title: "initialize the project"
steps:
- text: "initialize the project"
command: |
# your code here
- name: build
title: "build the project"
steps:
- text: "build the project"
command: |
npx wtg build
- text: "this will create a walkthrough.md file"
command: |
cat walkthrough.md
results:
================================================
FILE: packages/walkthroughgen/jest.config.js
================================================
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
testMatch: ['**/test/**/*.ts'],
testPathIgnorePatterns: ['/node_modules/', '/test/utils/'],
transform: {
'^.+\\.ts$': 'ts-jest',
},
};
================================================
FILE: packages/walkthroughgen/package.json
================================================
{
"name": "walkthroughgen",
"version": "1.0.0",
"main": "index.js",
"scripts": {
"test": "jest",
"test:watch": "jest --watch"
},
"keywords": [],
"author": "",
"license": "ISC",
"description": "",
"dependencies": {
"@boundaryml/baml": "^0.85.0",
"@types/diff": "^7.0.2",
"@types/js-yaml": "^4.0.9",
"diff": "^7.0.0",
"js-yaml": "^4.1.0",
"typescript": "^5.8.3"
},
"devDependencies": {
"@types/jest": "^29.5.14",
"jest": "^29.7.0",
"ts-jest": "^29.3.2"
}
}
================================================
FILE: packages/walkthroughgen/prompt.md
================================================
Walkthroughgen is a tool for creating walkthroughs, tutorials, readmes, and documentation.
## Usage
You create a walkthrough by writing a simple yaml file that describes the walkthrough. In the file, you reference the incremental files that should exist at each step of the walkthrough
```
├── walkthrough
│ ├── 00-package-lock.json
│ ├── 00-package.json
│ ├── 01-index.ts
│ ├── 02-cli.ts
│ └── 02-index.ts
└── walkthrough.yaml
```
Your walkthrough.yaml file might look like this (runnable example in [examples/typescript-cli](./examples/typescript))
```yaml
title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"
targets:
- markdown: "./build/walkthrough.md" # generates a walkthrough.md file
onChange: # default behavior - on changes, show diffs and cp commands
diff: true
cp: true
newFiles: # when new files are created, just show the copy command
cat: false
cp: true
- final: "./build/final" # outputs the final project to the final folder
- folders: "./build/by-section" # creates a separate working folder for each section
sections:
- name: setup
title: "Copy initial files"
steps:
- file: {src: ./walkthrough/00-package.json, dest: package.json}
- file: {src: ./walkthrough/00-package-lock.json, dest: package-lock.json}
- file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json}
- name: initialize
title: "Initialize the project"
steps:
- text: "initialize the project"
command: |
npm install
- text: "then add index.ts"
file: {src: ./walkthrough/01-index.ts, dest: src/index.ts}
- text: "run it with tsx"
command: |
npx tsx src/index.ts
results:
- text: "you should see a hello world message"
code: |
hello world
- name: add-cli
title: "Add a CLI"
steps:
- text: "add a cli"
file: {src: ./walkthrough/02-cli.ts, dest: src/cli.ts}
- text: "add a cli"
file: {src: ./walkthrough/02-index.ts, dest: src/index.ts}
```
Build the project with:
```
npm i -g wtg
wtg build
```
based on your targets, this would create the following files
```
├── walkthrough
│ ├── 00-package-lock.json
│ ├── 00-package.json
│ ├── 01-index.ts
│ ├── 02-cli.ts
│ └── 02-index.ts
├── build
│ ├── by-section
│ │ ├── 00-initialize # only contains the files in `init`
│ │ │ ├── readme.md # contains steps for this section
│ │ │ ├── package.json
│ │ │ ├── package-lock.json
│ │ │ └── tsconfig.json
│ │ └── 01-add-cli # contains the files up to the START of section 1
│ │ ├── readme.md # contains steps for this section
│ │ ├── package.json
│ │ ├── package-lock.json
│ │ ├── tsconfig.json
│ │ └── src
│ │ └── index.ts
│ ├── final
│ │ ├── package.json
│ │ ├── package-lock.json
│ │ ├── tsconfig.json
│ │ └── src
│ │ ├── cli.ts
│ │ └── index.ts
│ └── walkthrough.md
and your walkthrough.md file will look like:
```markdown
# Setting up a typescript cli
this is a walkthrough for setting up a typescript cli
## Copy initial files
cp walkthrough/00-package.json package.json
cp walkthrough/00-package-lock.json package-lock.json
cp walkthrough/00-tsconfig.json tsconfig.json
## Initialize the project
initialize the project
npm install
then add index.ts
cp walkthrough/01-index.ts src/index.ts
and run it with tsx
npx tsx src/index.ts
you should see a hello world message
hello world
## Add a CLI
add a cli
```
```
cp walkthrough/02-cli.ts src/cli.ts
update index.ts to use the cli
```diff
const main = async () => {
+ return cli();
};
main();
```
or just:
cp walkthrough/02-index.ts src/index.ts
```
## Features
### Targets
- `file`: generates a single markdown file
- `folder`: creates a set of folders, one for each section
- `final`: outputs the final project to the current directory
### Init
### Sections
### Steps
#### Step
## Walkthrough.yaml for walkthroughgen
## Implementation Plan
- [ ] implement core walkthroughgen CLI - `wtg build` # defaults to walkthrough.yaml in current directory
- Scope 1: generating walkthrough.md
- [ ] create end-to-end test for a simple walkthrough file, just a single yaml file with no sections
- [ ] create end-to-end test for a walkthrough file with a single section
- [ ] test generation of diffs and cp commands
- Scope 2: generating final/ project build
- [ ] create end-to-end test for a walkthrough file with a final target
- Scope 3: generating by-section project builds with readmes
- [ ] create end-to-end test for a walkthrough file with a by-section target
================================================
FILE: packages/walkthroughgen/readme.md
================================================
# Walkthroughgen
Walkthroughgen is a tool for creating walkthroughs, tutorials, readmes, and documentation. It helps you maintain step-by-step guides by generating markdown and working directories from a simple YAML configuration.
## Features
- 📝 **Markdown Generation**: Create beautiful markdown files with diffs, code blocks, and collapsible sections
- 📁 **Working Directories**: Generate separate directories for each section of your walkthrough
- 🔄 **Incremental Changes**: Track and display changes between steps
- 🎯 **Multiple Targets**: Output to markdown, section folders, and final project state
- 📦 **File Management**: Copy files, create directories, and run commands
- 🔍 **Rich Diffs**: Show meaningful diffs between file versions
- 📚 **Section READMEs**: Generate per-section documentation
## Installation
```bash
npm install -g walkthroughgen
```
## Quick Start
1. Create a `walkthrough.yaml` file:
```yaml
title: "My Tutorial"
text: "A step-by-step guide"
targets:
- markdown: "./walkthrough.md"
onChange:
diff: true
cp: true
- folders:
path: "./by-section"
final:
dirName: "final"
sections:
- name: setup
title: "Initial Setup"
steps:
- file: {src: ./files/package.json, dest: package.json}
- command: "npm install"
```
2. Run the generator:
```bash
walkthroughgen generate walkthrough.yaml
```
## Directory Structure
A typical walkthrough project looks like this:
```
my-tutorial/
├── walkthrough/ # Source files for each step
│ ├── 00-package.json
│ ├── 01-index.ts
│ └── 02-config.ts
├── walkthrough.yaml # Walkthrough configuration
└── build/ # Generated output
├── by-section/ # Section-by-section working directories
│ ├── 00-setup/
│ └── 01-config/
├── final/ # Final project state
└── walkthrough.md # Generated markdown
```
## Walkthrough.yaml Configuration
### Top-Level Fields
- `title`: Title of the walkthrough
- `text`: Introduction text
- `targets`: Output configuration
- `sections`: Tutorial sections
### Targets
#### Markdown Target
```yaml
targets:
- markdown: "./output.md"
onChange:
diff: true # Show diffs for changed files
cp: true # Show cp commands
newFiles:
cat: false # Don't show file contents
cp: true # Show cp commands
```
#### Folders Target
```yaml
targets:
- folders:
path: "./by-section" # Base path for section folders
skip: ["cleanup"] # Sections to skip
final:
dirName: "final" # Name for final state directory
```
### Sections
Each section represents a logical step in your tutorial:
```yaml
sections:
- name: setup # Used for folder naming and skip array
title: "Initial Setup" # Display title
text: "Setup steps..." # Section description
steps:
# ... steps ...
```
### Steps
Steps define the actions to take:
#### File Copy
```yaml
steps:
- text: "Copy package.json"
file:
src: ./files/package.json
dest: package.json
```
#### Directory Creation
```yaml
steps:
- text: "Create src directory"
dir:
create: true
path: src
```
#### Command Execution
```yaml
steps:
- text: "Install dependencies"
command: "npm install"
incremental: true # run when building up folders target
```
#### Command Results
```yaml
steps:
- command: "npm run test"
results:
- text: "You should see:"
code: |
All tests passed!
```
## Generated Output
### Markdown Features
- **File Diffs**: Shows changes between versions
- **Copy Commands**: Easy-to-follow file copy instructions
- **Collapsible Sections**: Hide/show file contents
- **Code Highlighting**: Syntax highlighting for various languages
Example markdown output:
~~~markdown
# Initial Setup
Copy the package.json:
cp ./files/package.json package.json
show file
```json
{
"name": "my-project",
"version": "1.0.0"
}
```
Install dependencies:
npm install
You should see:
added 123 packages
~~~
### Section Folders
The `folders` target creates:
1. A directory for each section
2. Section-specific README.md files
3. Working project state
4. Optional final state directory
## Examples
See the [examples](./examples) directory for complete examples:
- [TypeScript CLI](./examples/typescript): Basic TypeScript project setup
- [Walkthroughgen](./examples/walkthroughgen): Self-documenting example
## Tips
1. Use meaningful section names - they become folder names
2. Include context in step text
3. Use `incremental: true` for commands that modify state
4. Leverage diffs to highlight important changes
5. Use the `skip` array to exclude setup/cleanup sections from output
## Contributing
Contributions welcome! Please read [CONTRIBUTING.md](./CONTRIBUTING.md) for details.
## License
This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details.
================================================
FILE: packages/walkthroughgen/src/cli.ts
================================================
import * as fs from 'fs';
import * as path from 'path';
import * as yaml from 'js-yaml';
import * as Diff from 'diff';
import { execSync } from 'child_process';
interface Section {
title: string;
text?: string;
name?: string; // Optional, used for folder naming
steps?: Array<{
text?: string; // Make text optional
file?: { src: string; dest: string };
command?: string;
incremental?: boolean; // New field: if true, command only runs for folders target
dir?: { create: boolean; path: string }; // Added dir step type
results?: Array<{ text: string; code: string }>;
}>;
}
interface WalkthroughData {
title: string;
text: string;
sections?: Section[];
targets?: Array<{
markdown?: string;
folders?: {
path: string; // Path for section folders, e.g. "./build/by-section"
skip?: string[]; // Section names to skip folder creation for
final?: {
dirName: string; // Name of the final directory containing all steps' results
};
};
onChange?: { diff?: boolean; cp?: boolean };
newFiles?: { cat?: boolean; cp?: boolean };
}>;
}
function getSectionBaseName(section: Section): string {
return section.name || section.title.toLowerCase().replace(/[^a-z0-9]+/g, '-');
}
function copySourceFiles(srcFile: string, projectRoot: string, sectionDir: string): void {
const srcAbsPath = path.resolve(projectRoot, srcFile);
const relPath = path.relative(projectRoot, srcAbsPath);
const destPath = path.join(sectionDir, relPath);
fs.mkdirSync(path.dirname(destPath), { recursive: true });
fs.copyFileSync(srcAbsPath, destPath);
}
function copyWorkingFile(srcFile: string, destFile: string, sectionDir: string): void {
const srcPath = path.join(sectionDir, srcFile);
const destPath = path.join(sectionDir, destFile);
fs.mkdirSync(path.dirname(destPath), { recursive: true });
fs.copyFileSync(srcPath, destPath);
}
function copyDirectory(src: string, dest: string): void {
if (!fs.existsSync(src)) return;
fs.mkdirSync(dest, { recursive: true });
const entries = fs.readdirSync(src, { withFileTypes: true });
for (const entry of entries) {
const srcPath = path.join(src, entry.name);
const destPath = path.join(dest, entry.name);
if (entry.isDirectory()) {
copyDirectory(srcPath, destPath);
} else {
fs.copyFileSync(srcPath, destPath);
}
}
}
function applyStepsToWorkingDir(
steps: Section['steps'],
projectRoot: string,
workingDir: string,
sectionPath: string | null = null // If provided, also copy source files to section's walkthrough/
): void {
if (!steps) return;
for (const step of steps) {
// Handle dir creation
if (step.dir?.create) {
const dirToCreate = path.join(workingDir, step.dir.path);
fs.mkdirSync(dirToCreate, { recursive: true });
}
// Handle file copy
if (step.file?.src) {
// Copy to working directory
const srcAbsPath = path.resolve(projectRoot, step.file.src);
const destPath = path.join(workingDir, step.file.dest);
fs.mkdirSync(path.dirname(destPath), { recursive: true });
fs.copyFileSync(srcAbsPath, destPath);
// If a section path is provided, also copy source file to section's walkthrough/
if (sectionPath) {
copySourceFiles(step.file.src, projectRoot, sectionPath);
}
}
// Handle command execution - only run if incremental is explicitly true
if (step.command && step.incremental === true) {
try {
execSync(step.command, { cwd: workingDir, stdio: 'inherit' });
} catch (error) {
console.error(`Error executing incremental command "${step.command}" in ${workingDir}:`, error);
// Log error but continue, matching behavior of file copy errors
}
}
}
}
function generateSectionMarkdown(section: Section): string {
let markdown = `# ${section.title}\n\n`;
if (section.text) {
markdown += `${section.text}\n\n`;
}
if (section.steps) {
for (const step of section.steps) {
if (step.text) {
markdown += `${step.text}\n\n`;
}
if (step.dir?.create) {
markdown += ` mkdir -p ${step.dir.path}\n\n`;
}
if (step.file) {
markdown += ` cp ${step.file.src} ${step.file.dest}\n\n`;
}
if (step.command) {
markdown += ` ${step.command.trim()}\n\n`;
}
if (step.results) {
for (const result of step.results) {
markdown += `${result.text}\n\n`;
if (result.code) {
markdown += result.code.trim().split('\n').map(line => ` ${line}`).join('\n') + '\n\n';
}
}
}
}
}
return markdown;
}
function formatMinimalDiff(filePath: string, oldContent: string, newContent: string): string | null {
// Normalize line endings in both inputs
const normalize = (str: string) => str.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
const normalizedOld = normalize(oldContent);
const normalizedNew = normalize(newContent);
if (normalizedOld === normalizedNew) {
return null;
}
// Using context: 2 to show some surrounding lines
const patch = Diff.createPatch(filePath, normalizedOld, normalizedNew, '', '', { context: 2 });
const patchLines = patch.split('\n');
const effectiveChangeLines: string[] = [];
let i = 0;
while (i < patchLines.length) {
const line = patchLines[i];
// Skip standard patch headers and hunk metadata
if (line.startsWith('---') || line.startsWith('+++') || line.startsWith('@@')) {
i++;
continue;
}
// Check for identical remove/add pairs (which means no effective change for these two lines)
if (line.startsWith('-')) {
let nextDiffLineIndex = i + 1;
// Skip empty lines AND "No newline" markers AND context lines to find the next actual diff line
while (nextDiffLineIndex < patchLines.length &&
(patchLines[nextDiffLineIndex].trim() === '' ||
patchLines[nextDiffLineIndex].startsWith('\\') ||
patchLines[nextDiffLineIndex].startsWith(' '))) {
nextDiffLineIndex++;
}
if (nextDiffLineIndex < patchLines.length && patchLines[nextDiffLineIndex].startsWith('+')) {
const removedText = line.substring(1).trim();
const addedText = patchLines[nextDiffLineIndex].substring(1).trim();
if (removedText === addedText) {
// Advance i past the current line, any skipped empty lines, and the matched added line
i = nextDiffLineIndex + 1;
continue;
}
}
}
// If the line starts with +, -, or space (context), it's a line to be included
if (line.startsWith('+') || line.startsWith('-') || line.startsWith(' ')) {
effectiveChangeLines.push(line);
}
i++;
}
if (effectiveChangeLines.length > 0) {
return `\`\`\`diff\n${filePath}\n${effectiveChangeLines.join('\n')}\n\`\`\`\n\n`;
}
return null;
}
function generateRichSectionMarkdown(
section: Section,
projectRoot: string,
sectionWorkingDir: string,
walkthroughTargets: WalkthroughData['targets']
): string {
let markdown = `# ${section.title}\n\n`;
if (section.text) {
markdown += `${section.text}\n\n`;
}
// Initialize section's virtual file state from the actual files in sectionWorkingDir
const sectionVirtualFileState = new Map();
if (fs.existsSync(sectionWorkingDir)) {
const readFilesRecursively = (dir: string) => {
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
const relativePath = path.relative(sectionWorkingDir, fullPath);
if (entry.isDirectory()) {
readFilesRecursively(fullPath);
} else {
try {
const content = fs.readFileSync(fullPath, 'utf8');
sectionVirtualFileState.set(relativePath, content);
} catch (error) {
console.warn(`Warning: Could not read file ${fullPath} for section README state`);
}
}
}
};
readFilesRecursively(sectionWorkingDir);
}
if (section.steps) {
for (const step of section.steps) {
if (step.text) {
markdown += `${step.text}\n\n`;
}
if (step.dir?.create) {
markdown += ` mkdir -p ${step.dir.path}\n\n`;
}
if (step.file) {
const srcAbsolutePath = path.resolve(projectRoot, step.file.src);
const destRelativePath = path.normalize(step.file.dest);
let newContent: string;
try {
newContent = fs.readFileSync(srcAbsolutePath, 'utf8');
} catch (error: any) {
console.warn(`Warning: Could not read source file ${srcAbsolutePath} for step: ${step.text || 'Unnamed step'}`);
continue;
}
const isExistingVirtualFile = sectionVirtualFileState.has(destRelativePath);
const oldContent = isExistingVirtualFile ? sectionVirtualFileState.get(destRelativePath)! : '';
if (isExistingVirtualFile) {
// File is being changed/overwritten
const shouldDiff = walkthroughTargets?.[0]?.onChange?.diff === true;
let diffPrintedThisStep = false;
if (shouldDiff && oldContent !== newContent) {
const diffOutput = formatMinimalDiff(destRelativePath, oldContent, newContent);
if (diffOutput) {
markdown += diffOutput;
diffPrintedThisStep = true;
}
}
const showCp = walkthroughTargets?.[0]?.onChange?.cp !== false;
if (showCp) {
const cpCommand = `cp ${step.file.src} ${step.file.dest}`;
if (diffPrintedThisStep) {
markdown += `\nskip this step
\n\n`;
markdown += ` ${cpCommand}\n\n`;
markdown += ` \n\n`;
} else {
markdown += ` ${cpCommand}\n\n`;
// Add "show file" details block
let lang = path.extname(step.file.src).substring(1);
if (lang === 'baml') {
lang = 'rust';
}
markdown += `\nshow file
\n\n`;
markdown += `\`\`\`${lang}\n`;
markdown += `// ${step.file.src}\n`;
markdown += `${newContent.trim()}\n`;
markdown += `\`\`\`\n\n`;
markdown += ` \n\n`;
}
}
} else {
// New file
const showCpForNew = walkthroughTargets?.[0]?.newFiles?.cp !== false;
if (showCpForNew) {
const cpCommand = `cp ${step.file.src} ${step.file.dest}`;
markdown += ` ${cpCommand}\n\n`;
// Add "show file" details block
let lang = path.extname(step.file.src).substring(1);
if (lang === 'baml') {
lang = 'rust';
}
markdown += `\nshow file
\n\n`;
markdown += `\`\`\`${lang}\n`;
markdown += `// ${step.file.src}\n`;
markdown += `${newContent.trim()}\n`;
markdown += `\`\`\`\n\n`;
markdown += ` \n\n`;
}
}
sectionVirtualFileState.set(destRelativePath, newContent);
}
if (step.command) {
markdown += step.command.trim().split('\n').map(line => ` ${line}`).join('\n') + '\n\n';
}
if (step.results) {
for (const result of step.results) {
markdown += `${result.text}\n\n`;
if (result.code) {
markdown += result.code.trim().split('\n').map(line => ` ${line}`).join('\n') + '\n\n';
}
}
}
}
}
return markdown;
}
export const cli = (argv: string[]) => {
if (argv.includes("--help") || argv.includes("-h")) {
console.log(`
USAGE:
walkthroughgen generate [options]
OPTIONS:
--help, -h Show help
generate Generate markdown from YAML file
`);
return;
}
if (argv[0] === "generate") {
if (!argv[1]) {
console.error("Error: YAML file path is required for 'generate' command.");
console.log("Usage: walkthroughgen generate ");
return;
}
const yamlPath = argv[1];
let yamlContent;
try {
yamlContent = fs.readFileSync(yamlPath, 'utf8');
} catch (error: any) {
console.error(`Error: Could not read YAML file at '${yamlPath}'.`);
console.error(error.message);
return;
}
let data: WalkthroughData;
try {
data = yaml.load(yamlContent) as WalkthroughData;
} catch (error: any) {
console.error(`Error: Could not parse YAML content from '${yamlPath}'.`);
console.error(error.message);
return;
}
if (!data || typeof data.title !== 'string' || typeof data.text !== 'string') {
console.error(`Error: Invalid YAML structure in '${yamlPath}'. Missing required 'title' or 'text' fields.`);
return;
}
// Track virtual file state for diff generation
const projectRoot = path.dirname(yamlPath);
const virtualFileState = new Map();
// Process folders target first
if (data.targets) {
for (const target of data.targets) {
// Ensure target.folders is an object with a path property
if (target.folders && typeof target.folders === 'object' && target.folders.path) {
const currentFoldersTarget = target.folders; // Assign to a new const for type narrowing
const foldersBasePath = path.join(path.dirname(yamlPath), currentFoldersTarget.path);
console.log('Creating folders base path:', foldersBasePath);
fs.mkdirSync(foldersBasePath, { recursive: true });
// Create a temporary working directory to build up state
const workingDirName = `.tmp-working-${Date.now()}`;
const workingDir = path.join(foldersBasePath, workingDirName);
console.log('Creating working directory:', workingDir);
fs.mkdirSync(workingDir, { recursive: true });
try {
// Create section folders and build up working state
if (data.sections) {
let visibleSectionIndex = 0; // Counter for non-skipped sections
data.sections.forEach((section, originalIndex) => {
const baseName = getSectionBaseName(section);
// For logging, use original index to be clear about which section from YAML it is
const logSectionIdentifier = `${String(originalIndex).padStart(2, '0')}-${baseName}`;
console.log('Processing section:', logSectionIdentifier, 'with name:', section.name);
const shouldSkip = currentFoldersTarget.skip?.includes(section.name || '');
let sectionPathForApplySteps: string | null = null;
if (!shouldSkip) {
// Use visibleSectionIndex for the actual folder name
const sectionFolderName = `${String(visibleSectionIndex).padStart(2, '0')}-${baseName}`;
const sectionPath = path.join(foldersBasePath, sectionFolderName);
console.log('Creating section folder:', sectionPath);
fs.mkdirSync(sectionPath, { recursive: true });
// Copy current working state to section folder
if (fs.existsSync(workingDir) && fs.readdirSync(workingDir).length > 0) {
copyDirectory(workingDir, sectionPath);
}
// Generate and write section README
const sectionMarkdown = generateRichSectionMarkdown(section, projectRoot, sectionPath, data.targets);
fs.writeFileSync(path.join(sectionPath, 'README.md'), sectionMarkdown);
sectionPathForApplySteps = sectionPath;
visibleSectionIndex++; // Increment only for sections that get a folder
}
// Apply steps to working directory
applyStepsToWorkingDir(section.steps, projectRoot, workingDir, sectionPathForApplySteps);
});
// Create final directory if specified
if (currentFoldersTarget.final?.dirName) {
const finalDirPath = path.join(foldersBasePath, currentFoldersTarget.final.dirName);
fs.mkdirSync(finalDirPath, { recursive: true });
copyDirectory(workingDir, finalDirPath);
// Optional: Generate cumulative README for final directory
const finalReadme = data.sections
.filter(s => !currentFoldersTarget.skip?.includes(s.name || ''))
.map(s => generateSectionMarkdown(s))
.join('\n');
fs.writeFileSync(path.join(finalDirPath, 'README.md'), finalReadme);
}
}
} finally {
// Clean up working directory
if (fs.existsSync(workingDir)) {
fs.rmSync(workingDir, { recursive: true, force: true });
}
}
}
}
}
let markdown = `# ${data.title}\n\n${data.text}\n\n`;
if (data.sections) {
for (const section of data.sections) {
markdown += `## ${section.title}\n\n`;
if (section.text) {
markdown += `${section.text}\n\n`;
}
if (section.steps) {
for (const step of section.steps) {
if (step.text) { // Only add step.text if it exists
markdown += `${step.text}\n\n`;
}
if (step.file) {
const srcAbsolutePath = path.resolve(projectRoot, step.file.src);
const destRelativePath = path.normalize(step.file.dest);
let newContent: string;
try {
newContent = fs.readFileSync(srcAbsolutePath, 'utf8');
} catch (error: any) {
console.warn(`Warning: Could not read source file ${srcAbsolutePath} for step: ${step.text || 'Unnamed step'}`);
continue;
}
const isExistingVirtualFile = virtualFileState.has(destRelativePath);
const oldContent = isExistingVirtualFile ? virtualFileState.get(destRelativePath)! : '';
if (isExistingVirtualFile) {
// File is being changed/overwritten
const shouldDiff = data.targets?.[0]?.onChange?.diff === true;
let diffPrintedThisStep = false;
if (shouldDiff && oldContent !== newContent) {
const diffOutput = formatMinimalDiff(destRelativePath, oldContent, newContent);
if (diffOutput) {
markdown += diffOutput;
diffPrintedThisStep = true;
}
}
const showCp = data.targets?.[0]?.onChange?.cp !== false;
if (showCp) {
const cpCommand = `cp ${step.file.src} ${step.file.dest}`;
if (diffPrintedThisStep) {
markdown += `\nskip this step
\n\n`;
markdown += ` ${cpCommand}\n\n`;
markdown += ` \n\n`;
} else {
markdown += ` ${cpCommand}\n\n`;
// Add "show file" details block
let lang = path.extname(step.file.src).substring(1);
if (lang === 'baml') {
lang = 'rust';
}
markdown += `\nshow file
\n\n`;
markdown += `\`\`\`${lang}\n`;
markdown += `// ${step.file.src}\n`;
markdown += `${newContent.trim()}\n`;
markdown += `\`\`\`\n\n`;
markdown += ` \n\n`;
}
}
} else {
// New file
const showCpForNew = data.targets?.[0]?.newFiles?.cp !== false;
if (showCpForNew) {
const cpCommand = `cp ${step.file.src} ${step.file.dest}`;
markdown += ` ${cpCommand}\n\n`;
// Add "show file" details block
let lang = path.extname(step.file.src).substring(1);
if (lang === 'baml') {
lang = 'rust';
}
markdown += `\nshow file
\n\n`;
markdown += `\`\`\`${lang}\n`;
markdown += `// ${step.file.src}\n`;
markdown += `${newContent.trim()}\n`;
markdown += `\`\`\`\n\n`;
markdown += ` \n\n`;
}
}
virtualFileState.set(destRelativePath, newContent);
}
if (step.command) { // Always show commands in markdown
let commandLine = ` ${step.command.trim()}`;
markdown += commandLine;
markdown += "\n\n";
}
if (step.results) {
for (const result of step.results) {
markdown += `${result.text}\n\n`;
if (result.code) {
markdown += result.code.trim().split('\n').map(line => ` ${line}`).join('\n') + '\n\n';
}
}
}
}
}
}
}
const outputPath = data.targets?.[0]?.markdown
? path.join(path.dirname(yamlPath), data.targets[0].markdown)
: path.join(path.dirname(yamlPath), 'walkthrough.md');
try {
fs.mkdirSync(path.dirname(outputPath), { recursive: true });
fs.writeFileSync(outputPath, markdown);
console.log(`Successfully generated walkthrough to ${outputPath}`);
} catch (error: any) {
console.error(`Error: Could not write markdown file to '${outputPath}'.`);
console.error(error.message);
return;
}
return;
}
console.log("Unknown command. Available commands: generate. Use --help for more info.");
};
================================================
FILE: packages/walkthroughgen/src/index.ts
================================================
import { cli } from "./cli";
const main = async () => {
cli(process.argv.slice(2));
};
main().catch(console.error);
================================================
FILE: packages/walkthroughgen/test/e2e/test-e2e.ts
================================================
import * as fs from 'fs';
import * as path from 'path';
import { cli } from "../../src/cli";
import { withMockedConsole } from "../utils/console-mock";
import { withTmpDir } from "../utils/temp-dir";
describe("CLI basics", () => {
it("should handle --help flag", () => {
const output = withMockedConsole(() => {
cli(["--help"]);
});
expect(output).toContain("USAGE:");
expect(output).toContain("OPTIONS:");
expect(output).toContain("--help, -h");
});
it("should handle -h flag", () => {
const output = withMockedConsole(() => {
cli(["-h"]);
});
expect(output).toContain("USAGE:");
expect(output).toContain("OPTIONS:");
expect(output).toContain("--help, -h");
});
it("should show error for missing yaml file path", () => {
const output = withMockedConsole(() => {
cli(["generate"]);
});
expect(output).toContain("Error: YAML file path is required");
});
it("should show error for non-existent yaml file", () => {
const output = withMockedConsole(() => {
cli(["generate", "non-existent.yaml"]);
});
expect(output).toContain("Error: Could not read YAML file");
});
it("should show error for invalid yaml content", () => {
withTmpDir((tempDir: string) => {
fs.writeFileSync(
path.join(tempDir, 'invalid.yaml'),
`invalid: yaml: content: [}`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "invalid.yaml")]);
});
expect(output).toContain("Error: Could not parse YAML content");
});
});
it("should show error for missing required fields", () => {
withTmpDir((tempDir: string) => {
fs.writeFileSync(
path.join(tempDir, 'missing-fields.yaml'),
`some_field: "some value"`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "missing-fields.yaml")]);
});
expect(output).toContain("Error: Invalid YAML structure");
expect(output).toContain("Missing required 'title' or 'text' fields");
});
});
it("should show unknown command message", () => {
const output = withMockedConsole(() => {
cli(["unknown"]);
});
expect(output).toContain("Unknown command");
expect(output).toContain("Available commands: generate");
});
});
describe("CLI generate basic markdown", () => {
it("should generate basic markdown", () => {
withTmpDir((tempDir: string) => {
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
});
expect(fs.existsSync(path.join(tempDir, 'walkthrough.md'))).toBe(true);
const content = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8');
expect(content).toContain("# setting up a typescript cli");
expect(content).toContain("this is a walkthrough for setting up a typescript cli");
expect(output).toContain("Successfully generated walkthrough");
});
});
it("should generate markdown with a section", () => {
withTmpDir((tempDir: string) => {
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"
sections:
- title: "Installation"
text: "First, let's install the necessary dependencies"`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
});
expect(fs.existsSync(path.join(tempDir, 'walkthrough.md'))).toBe(true);
const content = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8');
expect(content).toContain("# setting up a typescript cli");
expect(content).toContain("this is a walkthrough for setting up a typescript cli");
expect(content).toContain("## Installation");
expect(content).toContain("First, let's install the necessary dependencies");
expect(output).toContain("Successfully generated walkthrough");
});
});
it("should generate markdown with sections and steps", () => {
withTmpDir((tempDir: string) => {
fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "setting up a typescript cli"
text: "this is a walkthrough for setting up a typescript cli"
targets:
- markdown: "./build/walkthrough.md"
onChange:
diff: true
cp: true
sections:
- name: setup
title: "Initial Setup"
steps:
- text: "Create package.json"
file: {src: ./walkthrough/00-package.json, dest: package.json}
- text: "Install dependencies"
command: |
npm install
results:
- text: "You should see packages being installed"
code: |
added 123 packages`
);
fs.writeFileSync(
path.join(tempDir, 'walkthrough/00-package.json'),
`{
"name": "walkthroughgen",
"version": "1.0.0",
"description": "A CLI tool for generating walkthroughs",
"dependencies": {
"typescript": "^5.0.0"
}
}`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
});
expect(fs.existsSync(path.join(tempDir, 'build/walkthrough.md'))).toBe(true);
const content = fs.readFileSync(path.join(tempDir, 'build/walkthrough.md'), 'utf8').replace(/\r\n/g, '\n');
expect(content).toContain(`
# setting up a typescript cli
this is a walkthrough for setting up a typescript cli
## Initial Setup
Create package.json
cp ./walkthrough/00-package.json package.json
show file
\`\`\`json
// ./walkthrough/00-package.json
{
"name": "walkthroughgen",
"version": "1.0.0",
"description": "A CLI tool for generating walkthroughs",
"dependencies": {
"typescript": "^5.0.0"
}
}
\`\`\`
Install dependencies
npm install
You should see packages being installed
added 123 packages
`.trim());
expect(output).toContain("Successfully generated walkthrough");
});
});
});
describe("CLI generate from example", () => {
it("should generate markdown from the typescript example", () => {
withTmpDir((tempDir: string) => {
const exampleBasePath = path.resolve(__dirname, '../../examples/typescript');
const exampleWalkthroughDir = path.join(exampleBasePath, 'walkthrough');
// Copy walkthrough.yaml
const sourceYamlPath = path.join(exampleBasePath, 'walkthrough.yaml');
const destYamlPath = path.join(tempDir, 'walkthrough.yaml');
fs.copyFileSync(sourceYamlPath, destYamlPath);
// Copy walkthrough directory recursively
const destWalkthroughSubDir = path.join(tempDir, 'walkthrough');
fs.cpSync(exampleWalkthroughDir, destWalkthroughSubDir, { recursive: true });
// Run CLI
const output = withMockedConsole(() => {
cli(["generate", destYamlPath]);
});
// Assertions
const expectedMarkdownPath = path.join(tempDir, 'build/walkthrough.md');
expect(fs.existsSync(expectedMarkdownPath)).toBe(true);
expect(output).toContain("Successfully generated walkthrough");
// Content checks
const markdownContent = fs.readFileSync(expectedMarkdownPath, 'utf8').replace(/\r\n/g, '\n');
expect(markdownContent).toContain("# setting up a typescript cli");
expect(markdownContent).toContain("## Copy initial files");
expect(markdownContent).toContain("cp ./walkthrough/00-package.json package.json");
});
});
});
describe("CLI generate with diffs", () => {
it("should show diffs when files are overwritten", () => {
withTmpDir((tempDir: string) => {
fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
// Create initial package.json
fs.writeFileSync(
path.join(tempDir, 'walkthrough/v1-package.json'),
`{
"name": "example",
"version": "1.0.0",
"dependencies": {
"typescript": "^5.0.0"
}
}`
);
// Create updated package.json with a new dependency
fs.writeFileSync(
path.join(tempDir, 'walkthrough/v2-package.json'),
`{
"name": "example",
"version": "1.0.0",
"dependencies": {
"typescript": "^5.0.0",
"express": "^4.18.0"
}
}`
);
// Create walkthrough.yaml that updates package.json
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "Test Diff Generation"
text: "Testing diff generation for file updates"
targets:
- markdown: "./walkthrough.md"
onChange:
diff: true
cp: true
sections:
- title: "Initial Setup"
steps:
- text: "Create initial package.json"
file: {src: ./walkthrough/v1-package.json, dest: package.json}
- title: "Add Express"
steps:
- text: "Add express dependency"
file: {src: ./walkthrough/v2-package.json, dest: package.json}`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
});
expect(fs.existsSync(path.join(tempDir, 'walkthrough.md'))).toBe(true);
const content = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8').replace(/\r\n/g, '\n');
// First file copy should not have a diff (it's new)
expect(content).toContain("Create initial package.json");
expect(content).toContain("cp ./walkthrough/v1-package.json package.json");
expect(content).toContain(`
show file
\`\`\`json
// ./walkthrough/v1-package.json
{
"name": "example",
"version": "1.0.0",
"dependencies": {
"typescript": "^5.0.0"
}
}
\`\`\`
`);
// Second file copy should have a diff (it's an update)
expect(content).toContain("Add express dependency");
expect(content).toContain("```diff\npackage.json\n \"version\": \"1.0.0\",\n \"dependencies\": {\n- \"typescript\": \"^5.0.0\"\n+ \"typescript\": \"^5.0.0\",\n+ \"express\": \"^4.18.0\"\n }\n }");
expect(content).toContain(`
skip this step
cp ./walkthrough/v2-package.json package.json
`);
expect(output).toContain("Successfully generated walkthrough");
});
});
});
describe("CLI generate with folders target", () => {
it("should create base folders directory", () => {
withTmpDir((tempDir: string) => {
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "Test Folders"
text: "Testing folders target"
targets:
- folders: { path: "./build/by-section" }
sections:
- title: "First Section"
text: "First section text"`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
});
expect(fs.existsSync(path.join(tempDir, 'build/by-section'))).toBe(true);
expect(output).toContain("Successfully generated walkthrough");
});
});
it("should create first section folder with README", () => {
withTmpDir((tempDir: string) => {
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "Test Folders"
text: "Testing folders target"
targets:
- folders: { path: "./build/by-section" }
sections:
- name: first-section
title: "First Section"
text: "First section text"`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
});
const sectionPath = path.join(tempDir, 'build/by-section/00-first-section');
expect(fs.existsSync(sectionPath)).toBe(true);
expect(fs.existsSync(path.join(sectionPath, 'README.md'))).toBe(true);
// Check README content
const readmeContent = fs.readFileSync(path.join(sectionPath, 'README.md'), 'utf8');
expect(readmeContent).toContain("# First Section");
expect(readmeContent).toContain("First section text");
});
});
it("should copy files to the section's working directory", () => {
withTmpDir((tempDir: string) => {
// Create source file
fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
fs.writeFileSync(
path.join(tempDir, 'walkthrough/file.ts'),
'console.log("hello");'
);
// Create walkthrough.yaml
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "Test Folders"
text: "Testing folders target"
targets:
- folders: { path: "./build/by-section" }
sections:
- name: first-section
title: "First Section"
text: "First section text"
steps:
- text: "Add a file"
file: {src: ./walkthrough/file.ts, dest: src/file.ts}`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
});
// Check source file was copied to section's walkthrough directory
const sectionPath = path.join(tempDir, 'build/by-section/00-first-section');
expect(fs.existsSync(path.join(sectionPath, 'walkthrough/file.ts'))).toBe(true);
// Check file was NOT copied to its destination within the section
// (section folders only contain state BEFORE their own steps)
expect(fs.existsSync(path.join(sectionPath, 'src/file.ts'))).toBe(false);
// Check README includes the step
const readmeContent = fs.readFileSync(path.join(sectionPath, 'README.md'), 'utf8');
expect(readmeContent).toContain("Add a file");
expect(readmeContent).toContain("cp ./walkthrough/file.ts src/file.ts");
});
});
it("should include files from previous sections", () => {
withTmpDir((tempDir: string) => {
// Create source files
fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
fs.writeFileSync(
path.join(tempDir, 'walkthrough/file1.ts'),
'console.log("hello 1");'
);
fs.writeFileSync(
path.join(tempDir, 'walkthrough/file2.ts'),
'console.log("hello 2");'
);
// Create walkthrough.yaml with two sections
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "Test Folders"
text: "Testing folders target"
targets:
- folders: { path: "./build/by-section" }
sections:
- name: first-section
title: "First Section"
text: "First section text"
steps:
- text: "Add first file"
file: {src: ./walkthrough/file1.ts, dest: src/file1.ts}
- name: second-section
title: "Second Section"
text: "Second section text"
steps:
- text: "Add second file"
file: {src: ./walkthrough/file2.ts, dest: src/file2.ts}`
);
const output = withMockedConsole(() => {
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
});
// Check first section does NOT have its own file
// (section folders only contain state BEFORE their own steps)
const firstSectionPath = path.join(tempDir, 'build/by-section/00-first-section');
expect(fs.existsSync(path.join(firstSectionPath, 'src/file1.ts'))).toBe(false);
// Check second section has first section's file but NOT its own file
const secondSectionPath = path.join(tempDir, 'build/by-section/01-second-section');
expect(fs.existsSync(path.join(secondSectionPath, 'src/file1.ts'))).toBe(true);
expect(fs.existsSync(path.join(secondSectionPath, 'src/file2.ts'))).toBe(false);
// Check READMEs
const firstReadme = fs.readFileSync(path.join(firstSectionPath, 'README.md'), 'utf8');
expect(firstReadme).toContain("Add first file");
expect(firstReadme).toContain("cp ./walkthrough/file1.ts src/file1.ts");
const secondReadme = fs.readFileSync(path.join(secondSectionPath, 'README.md'), 'utf8');
expect(secondReadme).toContain("Add second file");
expect(secondReadme).toContain("cp ./walkthrough/file2.ts src/file2.ts");
});
});
it("should correctly generate section folders with dir creation and specific file content", () => {
withTmpDir((tempDir: string) => {
// --- Setup source files ---
fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
// package.json for hello-world section
fs.writeFileSync(
path.join(tempDir, 'walkthrough/00-package.json'),
JSON.stringify({ name: "hello-world-pkg", dependencies: {} }, null, 2)
);
// tsconfig.json for hello-world section
fs.writeFileSync(
path.join(tempDir, 'walkthrough/00-tsconfig.json'),
JSON.stringify({ compilerOptions: { target: "esnext" } }, null, 2)
);
// This is the content EXPECTED in hello-world/src/index.ts
const expectedHelloWorldIndexContent = 'console.log("hello, world!"); // Simple version';
// The YAML for hello-world section will point to this source file.
fs.writeFileSync(
path.join(tempDir, 'walkthrough/01-index.ts'), // As per user's YAML for hello-world
expectedHelloWorldIndexContent
);
// This is the content that the user sees INCORRECTLY appearing in hello-world/src/index.ts.
// This file won't be directly referenced by the hello-world section in this test's YAML.
// If this content appears, it means something is wrong with file sourcing or cumulative logic.
const cliIndexContent = 'import { cli } from "./cli"; cli(); // CLI version';
fs.writeFileSync(
path.join(tempDir, 'walkthrough/02-index.ts'), // A different file
cliIndexContent
);
const cliTSContent = 'export function cli() { console.log("cli"); }';
fs.writeFileSync(
path.join(tempDir, 'walkthrough/02-cli.ts'), // A different file
cliTSContent
);
// --- Setup walkthrough.yaml ---
const walkthroughYamlContent = `
title: "Test Folders Feature"
text: "Testing dir creation and file content isolation between sections."
targets:
- folders:
path: "./build/sections"
skip:
- "cleanup"
final:
dirName: "final"
sections:
- name: cleanup
title: "Cleanup Section"
steps:
- text: "Simulate cleanup (command is illustrative, not run by folders target)"
command: "rm -rf src/"
- name: hello-world
title: "Hello World Section"
steps:
- text: "Copy package.json"
file: {src: ./walkthrough/00-package.json, dest: package.json}
- text: "Copy tsconfig.json"
file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json}
- text: "Create src folder"
dir: {create: true, path: src}
- text: "Add simple hello world index.ts"
file: {src: ./walkthrough/01-index.ts, dest: src/index.ts} # Points to expectedHelloWorldIndexContent
- name: cli-version # A subsequent section
title: "CLI Version Section"
steps:
- text: "add a CLI"
file: {src: ./walkthrough/02-cli.ts, dest: src/cli.ts} # adds src/cli.ts
- text: "Update index.ts to CLI version"
file: {src: ./walkthrough/02-index.ts, dest: src/index.ts} # Overwrites src/index.ts
- name: runnable
title: "run the cli"
steps:
- text: "run the cli"
command: "npx tsx src/index.ts"
`;
fs.writeFileSync(path.join(tempDir, 'walkthrough.yaml'), walkthroughYamlContent);
// --- Run CLI ---
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
// --- Assertions ---
const cleanupSectionPath = path.join(tempDir, 'build/sections/00-cleanup');
const helloWorldSectionPath = path.join(tempDir, 'build/sections/00-hello-world');
const cliSectionPath = path.join(tempDir, 'build/sections/01-cli-version');
const finalSectionPath = path.join(tempDir, 'build/sections/final');
//
// Cleanup Section
//
// cleanup has skip:true so it should not exist
expect(fs.existsSync(cleanupSectionPath)).toBe(false);
//
// Hello World Section
//
// Assert hello-world section - this should have the results of the previous step (NOTHING)
expect(fs.existsSync(helloWorldSectionPath)).toBe(true);
// Check package.json and tsconfig.json don't exist yet
expect(fs.existsSync(path.join(helloWorldSectionPath, 'src'))).toBe(false);
expect(fs.existsSync(path.join(helloWorldSectionPath, 'package.json'))).toBe(false);
expect(fs.existsSync(path.join(helloWorldSectionPath, 'tsconfig.json'))).toBe(false);
//
// CLI Section
//
// The cli section should contain the results of the hell-world section
const packageJSONPath = path.join(cliSectionPath, 'package.json');
const tsconfigJSONPath = path.join(cliSectionPath, 'tsconfig.json');
const indexTSPath = path.join(cliSectionPath, 'src/index.ts');
expect(fs.existsSync(packageJSONPath)).toBe(true);
expect(fs.existsSync(tsconfigJSONPath)).toBe(true);
expect(fs.existsSync(indexTSPath)).toBe(true);
const packageJSONContent = fs.readFileSync(packageJSONPath, 'utf8');
expect(packageJSONContent).toContain("hello-world-pkg");
const tsconfigJSONContent = fs.readFileSync(tsconfigJSONPath, 'utf8');
expect(tsconfigJSONContent).toContain("\"target\": \"esnext\"");
const indexTSContent = fs.readFileSync(indexTSPath, 'utf8');
expect(indexTSContent).toContain("console.log(\"hello, world!\");");
//
// Final Section
//
// the final folder, marked by "final: dirName: final" should contain all the files from the last section
expect(fs.existsSync(finalSectionPath)).toBe(true);
expect(fs.existsSync(path.join(finalSectionPath, 'src/index.ts'))).toBe(true);
expect(fs.existsSync(path.join(finalSectionPath, 'src/cli.ts'))).toBe(true);
expect(fs.existsSync(path.join(finalSectionPath, 'package.json'))).toBe(true);
expect(fs.existsSync(path.join(finalSectionPath, 'tsconfig.json'))).toBe(true);
// Verify index.ts calls the cli function
const finalIndexContent = fs.readFileSync(path.join(finalSectionPath, 'src/index.ts'), 'utf8');
expect(finalIndexContent).toContain(cliIndexContent);
const finalCliContent = fs.readFileSync(path.join(finalSectionPath, 'src/cli.ts'), 'utf8');
expect(finalCliContent).toContain(cliTSContent);
});
});
it("should execute commands in the working directory for folders target", () => {
withTmpDir((tempDir: string) => {
// Create walkthrough.yaml
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "Test Command Execution in Folders"
text: "Testing command execution"
targets:
- folders:
path: "./build/cmd-test"
final:
dirName: "final-cmd"
sections:
- name: section-with-command
title: "Section with Command"
steps:
- text: "Create a file via command"
command: "echo 'command content' > command_file.txt"
incremental: true
- name: next-section
title: "Next Section"
steps:
- text: "Another step"
command: "echo 'another' > another_file.txt"
incremental: true`
);
// Run CLI
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
// Assertions
const firstSectionPath = path.join(tempDir, 'build/cmd-test/00-section-with-command');
const secondSectionPath = path.join(tempDir, 'build/cmd-test/01-next-section');
const finalPath = path.join(tempDir, 'build/cmd-test/final-cmd');
// First section should NOT have its own command's file
expect(fs.existsSync(path.join(firstSectionPath, 'command_file.txt'))).toBe(false);
// Second section SHOULD have first section's command's file
expect(fs.existsSync(path.join(secondSectionPath, 'command_file.txt'))).toBe(true);
// But should NOT have its own command's file
expect(fs.existsSync(path.join(secondSectionPath, 'another_file.txt'))).toBe(false);
// Final folder should have both files
expect(fs.existsSync(path.join(finalPath, 'command_file.txt'))).toBe(true);
expect(fs.existsSync(path.join(finalPath, 'another_file.txt'))).toBe(true);
// Check file contents
const commandFileContent = fs.readFileSync(path.join(secondSectionPath, 'command_file.txt'), 'utf8').trim();
expect(commandFileContent).toBe('command content');
const finalAnotherFileContent = fs.readFileSync(path.join(finalPath, 'another_file.txt'), 'utf8').trim();
expect(finalAnotherFileContent).toBe('another');
});
});
it("should handle incremental commands correctly", () => {
withTmpDir((tempDir: string) => {
// Create walkthrough.yaml
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "Test Incremental Commands"
text: "Testing incremental command behavior"
targets:
- markdown: "./walkthrough.md"
- folders:
path: "./build/cmd-test"
final:
dirName: "final"
sections:
- name: section-with-commands
title: "Section with Commands"
steps:
- text: "Regular command (not executed in folders, shown in MD)"
command: "echo 'regular command' > regular.txt"
- text: "Incremental command (executed in folders, shown in MD)"
command: "echo 'incremental command' > incremental.txt"
incremental: true
- text: "Another regular command (not executed in folders, shown in MD)"
command: "echo 'another regular' > another_regular.txt"
incremental: false`
);
// Run CLI
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
// Check markdown output - ALL commands should be in markdown
const markdownContent = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8');
expect(markdownContent).toContain("echo 'regular command' > regular.txt");
expect(markdownContent).toContain("echo 'incremental command' > incremental.txt");
expect(markdownContent).toContain("echo 'another regular' > another_regular.txt");
// Check folders output - only incremental commands should have run
const finalPath = path.join(tempDir, 'build/cmd-test/final');
expect(fs.existsSync(path.join(finalPath, 'regular.txt'))).toBe(false);
expect(fs.existsSync(path.join(finalPath, 'incremental.txt'))).toBe(true);
expect(fs.existsSync(path.join(finalPath, 'another_regular.txt'))).toBe(false);
// Check file contents for incremental command
const incrementalContent = fs.readFileSync(path.join(finalPath, 'incremental.txt'), 'utf8').trim();
expect(incrementalContent).toBe('incremental command');
});
});
it("should generate section READMEs with diffs and show file blocks", () => {
withTmpDir((tempDir: string) => {
// Create source files
fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true });
fs.writeFileSync(
path.join(tempDir, 'walkthrough/v1-index.ts'),
'console.log("hello");'
);
fs.writeFileSync(
path.join(tempDir, 'walkthrough/v2-index.ts'),
'console.log("hello");\nconsole.log("world");'
);
// Create walkthrough.yaml
fs.writeFileSync(
path.join(tempDir, 'walkthrough.yaml'),
`title: "Test Section README Diffs"
text: "Testing section README diff generation"
targets:
- folders:
path: "./build/sections"
final:
dirName: "final"
onChange:
diff: true
cp: true
newFiles:
cat: false
cp: true
sections:
- name: first-section
title: "First Section"
text: "First section text"
steps:
- text: "Add initial index.ts"
file: {src: ./walkthrough/v1-index.ts, dest: src/index.ts}
- name: second-section
title: "Second Section"
text: "Second section text"
steps:
- text: "Update index.ts"
file: {src: ./walkthrough/v2-index.ts, dest: src/index.ts}`
);
// Run CLI
cli(["generate", path.join(tempDir, "walkthrough.yaml")]);
// Check first section README
const firstSectionPath = path.join(tempDir, 'build/sections/00-first-section');
const firstReadme = fs.readFileSync(path.join(firstSectionPath, 'README.md'), 'utf8');
expect(firstReadme).toContain("Add initial index.ts");
expect(firstReadme).toContain("cp ./walkthrough/v1-index.ts src/index.ts");
expect(firstReadme).toContain("\nshow file
");
expect(firstReadme).toContain("```ts\n// ./walkthrough/v1-index.ts");
expect(firstReadme).toContain('console.log("hello");');
// Check second section README
const secondSectionPath = path.join(tempDir, 'build/sections/01-second-section');
const secondReadme = fs.readFileSync(path.join(secondSectionPath, 'README.md'), 'utf8');
expect(secondReadme).toContain("Update index.ts");
expect(secondReadme).toContain("```diff\nsrc/index.ts\n+console.log(\"world\");");
expect(secondReadme).toContain("\nskip this step
");
expect(secondReadme).toContain("cp ./walkthrough/v2-index.ts src/index.ts");
});
});
});
================================================
FILE: packages/walkthroughgen/test/utils/console-mock.ts
================================================
/**
* A utility function to mock console.log and console.error and capture their output
* @param callback The function to execute while console is mocked
* @returns The captured console output (both log and error messages)
*/
export const withMockedConsole = (callback: () => void): string => {
const originalConsoleLog = console.log;
const originalConsoleError = console.error;
let capturedOutput: string[] = [];
console.log = (...args: any[]) => {
capturedOutput.push(args.join(" "));
};
console.error = (...args: any[]) => {
capturedOutput.push(args.join(" "));
};
try {
callback();
} finally {
console.log = originalConsoleLog;
console.error = originalConsoleError;
}
return capturedOutput.join("\n");
};
================================================
FILE: packages/walkthroughgen/test/utils/temp-dir.ts
================================================
import { mkdtempSync, rmSync } from 'fs';
import { tmpdir } from 'os';
import { join } from 'path';
/**
* Creates a temporary directory, executes a function with that directory, then removes it
*/
export function withTmpDir(fn: (dir: string) => T): T {
const dir = mkdtempSync(join(__dirname, '.tmptest'));
try {
return fn(dir);
} finally {
rmSync(dir, { recursive: true, force: true });
}
}
================================================
FILE: packages/walkthroughgen/tsconfig.json
================================================
{
"compilerOptions": {
"target": "es2016",
"module": "commonjs",
"esModuleInterop": true,
"forceConsistentCasingInFileNames": true,
"strict": true,
"skipLibCheck": true
},
"exclude": ["node_modules", "dist", "**/walkthrough/**"]
}
================================================
FILE: workshops/.gitignore
================================================
baml_client/
================================================
FILE: workshops/.python-version
================================================
3.11
================================================
FILE: workshops/2025-05/.gitignore
================================================
build/
================================================
FILE: workshops/2025-05/Makefile
================================================
.PHONY: clean
clean:
rm -rf build/
.PHONY: generate
generate: clean
npm -C ../../packages/walkthroughgen/ \
exec tsx \
../../packages/walkthroughgen/src/index.ts \
generate walkthrough.yaml
================================================
FILE: workshops/2025-05/final/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/final/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: workshops/2025-05/final/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/final/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/final/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/final/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"express": "^5.1.0",
"humanlayer": "^0.7.7",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/express": "^5.0.1",
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0",
"supertest": "^7.1.0"
}
}
================================================
FILE: workshops/2025-05/final/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
awaitingHumanResponse(): boolean {
const lastEvent = this.events[this.events.length - 1];
return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
}
awaitingHumanApproval(): boolean {
const lastEvent = this.events[this.events.length - 1];
return lastEvent.data.intent === 'divide';
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "divide":
// divide is scary, return it for human approval
return thread;
case "add":
case "subtract":
case "multiply":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/final/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
// custom email body - jinja
template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
reply to this email to approve
{% endif %}`
}
}
})
if (lastEvent.data.intent === "request_more_information") {
// fetch response synchronously - this will block until reply
const response = await hl.fetchHumanResponse({
spec: {
msg: lastEvent.data.message
}
})
return {
"type": "tool_response",
"data": response
}
}
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}
================================================
FILE: workshops/2025-05/final/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/final/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/final/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}
================================================
FILE: workshops/2025-05/final/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/00-hello-world/README.md
================================================
# Chapter 0 - Hello World
Let's start with a basic TypeScript setup and a hello world program.
This guide is written in TypeScript (yes, a python version is coming soon)
There are many checkpoints between the every file edit in theworkshop steps,
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.
To run this guide, you'll need a relatively recent version of nodejs and npm installed
You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine
brew install node@20
You should see the node version
node --version
Copy initial package.json
cp ./walkthrough/00-package.json package.json
show file
```json
// ./walkthrough/00-package.json
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
```
Install dependencies
npm install
Copy tsconfig.json
cp ./walkthrough/00-tsconfig.json tsconfig.json
show file
```json
// ./walkthrough/00-tsconfig.json
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
```
add .gitignore
cp ./walkthrough/00-.gitignore .gitignore
show file
```gitignore
// ./walkthrough/00-.gitignore
baml_client/
node_modules/
```
Create src folder
mkdir -p src
Add a simple hello world index.ts
cp ./walkthrough/00-index.ts src/index.ts
show file
```ts
// ./walkthrough/00-index.ts
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
```
Run it to verify
npx tsx src/index.ts
You should see:
hello, world!
================================================
FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-index.ts
================================================
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/README.md
================================================
# Chapter 1 - CLI and Agent Loop
Now let's add BAML and create our first agent with a CLI interface.
First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.
npm install @boundaryml/baml
Initialize BAML
npx baml-cli init
Remove default resume.baml
rm baml_src/resume.baml
Add our starter agent, a single baml prompt that we'll build on
cp ./walkthrough/01-agent.baml baml_src/agent.baml
show file
```rust
// ./walkthrough/01-agent.baml
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
```
Generate BAML client code
npx baml-cli generate
Enable BAML logging for this section
export BAML_LOG=debug
Add the CLI interface
cp ./walkthrough/01-cli.ts src/cli.ts
show file
```ts
// ./walkthrough/01-cli.ts
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
```
Update index.ts to use the CLI
```diff
src/index.ts
+import { cli } from "./cli"
+
async function hello(): Promise {
console.log('hello, world!')
async function main() {
- await hello()
+ await cli()
}
```
skip this step
cp ./walkthrough/01-index.ts src/index.ts
Add the agent implementation
cp ./walkthrough/01-agent.ts src/agent.ts
show file
```ts
// ./walkthrough/01-agent.ts
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
```
The the BAML code is configured to use OPENAI_API_KEY by default
As you're testing, you can change the model / provider to something else
as you please
client "openai/gpt-4o"
[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)
For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini)
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key.
export OPENAI_API_KEY=...
Try it out
npx tsx src/index.ts hello
you should see a familiar response from the model
{
intent: 'done_for_now',
message: 'Hello! How can I assist you today?'
}
================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/src/index.ts
================================================
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-agent.ts
================================================
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
================================================
FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/README.md
================================================
# Chapter 2 - Add Calculator Tools
Let's add some calculator tools to our agent.
Let's start by adding a tool definition for the calculator
These are simpile structured outputs that we'll ask the model to
return as a "next step" in the agentic loop.
cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
show file
```rust
// ./walkthrough/02-tool_calculator.baml
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
```
Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps
```diff
baml_src/agent.baml
function DetermineNextStep(
thread: string
-) -> DoneForNow {
+) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
```
skip this step
cp ./walkthrough/02-agent.baml baml_src/agent.baml
Generate updated BAML client
npx baml-cli generate
Try out the calculator
npx tsx src/index.ts 'can you add 3 and 4'
You should see a tool call to the calculator
{
intent: 'add',
a: 3,
b: 4
}
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/baml_src/agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/src/agent.ts
================================================
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/walkthrough/02-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
================================================
FILE: workshops/2025-05/sections/02-calculator-tools/walkthrough/02-tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/03-tool-loop/README.md
================================================
# Chapter 3 - Process Tool Calls in a Loop
Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.
First, lets update the agent to handle the tool call
```diff
src/agent.ts
}
-// right now this just runs one turn with the LLM, but
-// we'll update this function to handle all the agent logic
-export async function agentLoop(thread: Thread): Promise {
- const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
- return nextStep;
+
+
+export async function agentLoop(thread: Thread): Promise {
+
+ while (true) {
+ const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
+ console.log("nextStep", nextStep);
+
+ switch (nextStep.intent) {
+ case "done_for_now":
+ // response to human, return the next step object
+ return nextStep.message;
+ case "add":
+ thread.events.push({
+ "type": "tool_call",
+ "data": nextStep
+ });
+ const result = nextStep.a + nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ continue;
+ default:
+ throw new Error(`Unknown intent: ${nextStep.intent}`);
+ }
+ }
}
```
skip this step
cp ./walkthrough/03-agent.ts src/agent.ts
Now, lets try it out
npx tsx src/index.ts 'can you add 3 and 4'
you should see the agent call the tool and then return the result
{
intent: 'done_for_now',
message: 'The sum of 3 and 4 is 7.'
}
For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output
export BAML_LOG=off
Try a multi-step calculation
npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
you'll notice that tools like multiply and divide are not available
npx tsx src/index.ts 'can you multiply 3 and 4'
next, let's add handlers for the rest of the calculator tools
```diff
src/agent.ts
-import { b } from "../baml_client";
+import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
-// tool call or a respond to human tool
-type AgentResponse = Awaited>;
-
export interface Event {
type: string
}
+export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
+export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
+ let result: number;
+ switch (nextStep.intent) {
+ case "add":
+ result = nextStep.a + nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "subtract":
+ result = nextStep.a - nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "multiply":
+ result = nextStep.a * nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "divide":
+ result = nextStep.a / nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ }
+}
export async function agentLoop(thread: Thread): Promise {
console.log("nextStep", nextStep);
+ thread.events.push({
+ "type": "tool_call",
+ "data": nextStep
+ });
+
switch (nextStep.intent) {
case "done_for_now":
return nextStep.message;
case "add":
- thread.events.push({
- "type": "tool_call",
- "data": nextStep
- });
- const result = nextStep.a + nextStep.b;
- console.log("tool_response", result);
- thread.events.push({
- "type": "tool_response",
- "data": result
- });
- continue;
- default:
- throw new Error(`Unknown intent: ${nextStep.intent}`);
+ case "subtract":
+ case "multiply":
+ case "divide":
+ thread = await handleNextStep(nextStep, thread);
}
}
```
skip this step
cp ./walkthrough/03b-agent.ts src/agent.ts
Test subtraction
npx tsx src/index.ts 'can you subtract 3 from 4'
now, let's test the multiplication tool
npx tsx src/index.ts 'can you multiply 3 and 4'
finally, let's test a more complex calculation with multiple operations
npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
================================================
FILE: workshops/2025-05/sections/03-tool-loop/baml_src/agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/src/agent.ts
================================================
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/03-tool-loop/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/walkthrough/03-agent.ts
================================================
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
thread.events.push({
"type": "tool_call",
"data": nextStep
});
const result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
continue;
default:
throw new Error(`Unknown intent: ${nextStep.intent}`);
}
}
}
================================================
FILE: workshops/2025-05/sections/03-tool-loop/walkthrough/03b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/04-baml-tests/README.md
================================================
# Chapter 4 - Add Tests to agent.baml
Let's add some tests to our BAML agent.
to start, leave the baml logs enabled
export BAML_LOG=debug
next, let's add some tests to the agent
We'll start with a simple test that checks the agent's ability to handle
a basic calculation.
```diff
baml_src/agent.baml
"#
}
+
+test MathOperation {
+ functions [DetermineNextStep]
+ args {
+ thread #"
+ {
+ "type": "user_input",
+ "data": "can you multiply 3 and 4?"
+ }
+ "#
+ }
+}
+
```
skip this step
cp ./walkthrough/04-agent.baml baml_src/agent.baml
Run the tests
npx baml-cli test
now, let's improve the test with assertions!
Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.
```diff
baml_src/agent.baml
"#
}
+ @@assert(hello, {{this.intent == "done_for_now"}})
}
"#
}
+ @@assert(math_operation, {{this.intent == "multiply"}})
}
```
skip this step
cp ./walkthrough/04b-agent.baml baml_src/agent.baml
Run the tests
npx baml-cli test
as you add more tests, you can disable the logs to keep the output clean.
You may want to turn them on as you iterate on specific tests.
export BAML_LOG=off
now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window
```diff
baml_src/agent.baml
"#
}
- @@assert(hello, {{this.intent == "done_for_now"}})
+ @@assert(intent, {{this.intent == "done_for_now"}})
}
"#
}
- @@assert(math_operation, {{this.intent == "multiply"}})
+ @@assert(intent, {{this.intent == "multiply"}})
}
+test LongMath {
+ functions [DetermineNextStep]
+ args {
+ thread #"
+ [
+ {
+ "type": "user_input",
+ "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
+ },
+ {
+ "type": "tool_call",
+ "data": {
+ "intent": "multiply",
+ "a": 3,
+ "b": 4
+ }
+ },
+ {
+ "type": "tool_response",
+ "data": 12
+ },
+ {
+ "type": "tool_call",
+ "data": {
+ "intent": "divide",
+ "a": 12,
+ "b": 2
+ }
+ },
+ {
+ "type": "tool_response",
+ "data": 6
+ },
+ {
+ "type": "tool_call",
+ "data": {
+ "intent": "add",
+ "a": 6,
+ "b": 12
+ }
+ },
+ {
+ "type": "tool_response",
+ "data": 18
+ }
+ ]
+ "#
+ }
+ @@assert(intent, {{this.intent == "done_for_now"}})
+ @@assert(answer, {{"18" in this.message}})
+}
+
```
skip this step
cp ./walkthrough/04c-agent.baml baml_src/agent.baml
let's try to run it
npx baml-cli test
================================================
FILE: workshops/2025-05/sections/04-baml-tests/baml_src/agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/04-baml-tests/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/walkthrough/04-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/walkthrough/04b-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(hello, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(math_operation, {{this.intent == "multiply"}})
}
================================================
FILE: workshops/2025-05/sections/04-baml-tests/walkthrough/04c-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/05-human-tools/README.md
================================================
# Chapter 5 - Multiple Human Tools
In this section, we'll add support for multiple tools that serve to
contact humans.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
first, let's add a tool that can request clarification from a human
this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.
```diff
baml_src/agent.baml
+// human tools are async requests to a human
+type HumanTools = ClarificationRequest | DoneForNow
+
+class ClarificationRequest {
+ intent "request_more_information" @description("you can request more information from me")
+ message string
+}
+
class DoneForNow {
intent "done_for_now"
- message string
+
+ message string @description(#"
+ message to send to the user about the work that was done.
+ "#)
}
function DetermineNextStep(
thread: string
-) -> CalculatorTools | DoneForNow {
+) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
}
+
```
skip this step
cp ./walkthrough/05-agent.baml baml_src/agent.baml
next, let's re-generate the client code
NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.
npx baml-cli generate
now, let's update the agent to use the new tool
```diff
src/agent.ts
}
-export async function agentLoop(thread: Thread): Promise {
+export async function agentLoop(thread: Thread): Promise {
while (true) {
switch (nextStep.intent) {
case "done_for_now":
- // response to human, return the next step object
- return nextStep.message;
+ case "request_more_information":
+ // response to human, return the thread
+ return thread;
case "add":
case "subtract":
```
skip this step
cp ./walkthrough/05-agent.ts src/agent.ts
next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI
```diff
src/cli.ts
// cli.ts lets you invoke the agent loop from the command line
-import { agentLoop, Thread, Event } from "./agent";
+import { agentLoop, Thread, Event } from "../src/agent";
+
+
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
// Run the agent loop with the thread
const result = await agentLoop(thread);
- console.log(result);
+ let lastEvent = result.events.slice(-1)[0];
+
+ while (lastEvent.data.intent === "request_more_information") {
+ const message = await askHuman(lastEvent.data.message);
+ thread.events.push({ type: "human_response", data: message });
+ const result = await agentLoop(thread);
+ lastEvent = result.events.slice(-1)[0];
+ }
+
+ // print the final result
+ // optional - you could loop here too
+ console.log(lastEvent.data.message);
+ process.exit(0);
}
+
+async function askHuman(message: string) {
+ const readline = require('readline').createInterface({
+ input: process.stdin,
+ output: process.stdout
+ });
+
+ return new Promise((resolve) => {
+ readline.question(`${message}\n> `, (answer: string) => {
+ resolve(answer);
+ });
+ });
+}
```
skip this step
cp ./walkthrough/05-cli.ts src/cli.ts
let's try it out
npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '
next, let's add a test that checks the agent's ability to handle
a clarification request
```diff
baml_src/agent.baml
+
+test MathOperationWithClarification {
+ functions [DetermineNextStep]
+ args {
+ thread #"
+ [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+ "#
+ }
+ @@assert(intent, {{this.intent == "request_more_information"}})
+}
+
+test MathOperationPostClarification {
+ functions [DetermineNextStep]
+ args {
+ thread #"
+ [
+ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
+ {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
+ {"type":"human_response","data":"lets try 12 instead"},
+ ]
+ "#
+ }
+ @@assert(intent, {{this.intent == "multiply"}})
+ @@assert(a, {{this.b == 12}})
+ @@assert(b, {{this.a == 3}})
+}
+
+
+
```
skip this step
cp ./walkthrough/05b-agent.baml baml_src/agent.baml
and now we can run the tests again
npx baml-cli test
you'll notice the new test passes, but the hello world test fails
This is because the agent's default behavior is to return "done_for_now"
```diff
baml_src/agent.baml
"#
}
- @@assert(intent, {{this.intent == "done_for_now"}})
+ @@assert(intent, {{this.intent == "request_more_information"}})
}
```
skip this step
cp ./walkthrough/05c-agent.baml baml_src/agent.baml
Verify tests pass
npx baml-cli test
================================================
FILE: workshops/2025-05/sections/05-human-tools/baml_src/agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.202.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"@boundaryml/baml": "latest",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/05-human-tools/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
let lastEvent = result.events.slice(-1)[0];
while (lastEvent.data.intent === "request_more_information") {
const message = await askHuman(lastEvent.data.message);
thread.events.push({ type: "human_response", data: message });
const result = await agentLoop(thread);
lastEvent = result.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(message: string) {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve(answer);
});
});
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05b-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}
================================================
FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/README.md
================================================
# Chapter 6 - Customize Your Prompt with Reasoning
In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.
this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)
for this section, it will be helpful to leave the baml logs enabled
export BAML_LOG=debug
update the agent prompt to include a reasoning step
```diff
baml_src/agent.baml
{{ ctx.output_format }}
+
+ First, always plan out what to do next, for example:
+
+ - ...
+ - ...
+ - ...
+
+ {...} // schema
"#
}
@@assert(b, {{this.a == 3}})
}
-
-
```
skip this step
cp ./walkthrough/06-agent.baml baml_src/agent.baml
generate the updated client
npx baml-cli generate
now, you can try it out with a simple prompt
npx tsx src/index.ts 'can you multiply 3 and 4'
you should see output from the baml logs showing the reasoning steps
#### optional challenge
add a field to your tool output format that includes the reasoning steps in the output!
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
let lastEvent = result.events.slice(-1)[0];
while (lastEvent.data.intent === "request_more_information") {
const message = await askHuman(lastEvent.data.message);
thread.events.push({ type: "human_response", data: message });
const result = await agentLoop(thread);
lastEvent = result.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(message: string) {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve(answer);
});
});
}
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/06-customize-prompt/walkthrough/06-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
First, always plan out what to do next, for example:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}
================================================
FILE: workshops/2025-05/sections/07-context-window/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/07-context-window/README.md
================================================
# Chapter 7 - Customize Your Context Window
In this section, we'll explore how to customize the context window
of the agent.
this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
update the agent to pretty-print the Context window for the model
```diff
src/agent.ts
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
- return JSON.stringify(this.events);
+ return JSON.stringify(this.events, null, 2);
}
}
```
skip this step
cp ./walkthrough/07-agent.ts src/agent.ts
Test the formatting
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
next, let's update the agent to use XML formatting instead
this is a very popular format for passing data to a model,
among other things, because of the token efficiency of XML.
```diff
src/agent.ts
serializeForLLM() {
- // can change this to whatever custom serialization you want to do, XML, etc
- // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
- return JSON.stringify(this.events, null, 2);
+ return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
+
+ trimLeadingWhitespace(s: string) {
+ return s.replace(/^[ \t]+/gm, '');
+ }
+
+ serializeOneEvent(e: Event) {
+ return this.trimLeadingWhitespace(`
+ <${e.data?.intent || e.type}>
+ ${
+ typeof e.data !== 'object' ? e.data :
+ Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
+ ${e.data?.intent || e.type}>
+ `)
+ }
}
```
skip this step
cp ./walkthrough/07b-agent.ts src/agent.ts
let's try it out
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
lets update our tests to match the new output format
```diff
baml_src/agent.baml
{{ ctx.output_format }}
- First, always plan out what to do next, for example:
+ Always think about what to do next first, like:
- ...
args {
thread #"
- {
- "type": "user_input",
- "data": "hello!"
- }
+
+ hello!
+
"#
}
args {
thread #"
- {
- "type": "user_input",
- "data": "can you multiply 3 and 4?"
- }
+
+ can you multiply 3 and 4?
+
"#
}
args {
thread #"
- [
- {
- "type": "user_input",
- "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
- },
- {
- "type": "tool_call",
- "data": {
- "intent": "multiply",
- "a": 3,
- "b": 4
- }
- },
- {
- "type": "tool_response",
- "data": 12
- },
- {
- "type": "tool_call",
- "data": {
- "intent": "divide",
- "a": 12,
- "b": 2
- }
- },
- {
- "type": "tool_response",
- "data": 6
- },
- {
- "type": "tool_call",
- "data": {
- "intent": "add",
- "a": 6,
- "b": 12
- }
- },
- {
- "type": "tool_response",
- "data": 18
- }
- ]
+
+ can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
+
+
+
+
+ a: 3
+ b: 4
+
+
+
+
+ 12
+
+
+
+
+ a: 12
+ b: 2
+
+
+
+
+ 6
+
+
+
+
+ a: 6
+ b: 12
+
+
+
+
+ 18
+
+
"#
}
args {
thread #"
- [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+
+ can you multiply 3 and fe1iiaff10
+
"#
}
args {
thread #"
- [
- {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
- {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
- {"type":"human_response","data":"lets try 12 instead"},
- ]
+
+ can you multiply 3 and FD*(#F&& ?
+
+
+
+ message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
+
+
+
+ lets try 12 instead
+
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
```
skip this step
cp ./walkthrough/07c-agent.baml baml_src/agent.baml
check out the updated tests
npx baml-cli test
================================================
FILE: workshops/2025-05/sections/07-context-window/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
First, always plan out what to do next, for example:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}
================================================
FILE: workshops/2025-05/sections/07-context-window/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/07-context-window/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/07-context-window/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/07-context-window/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/sections/07-context-window/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/07-context-window/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
let lastEvent = result.events.slice(-1)[0];
while (lastEvent.data.intent === "request_more_information") {
const message = await askHuman(lastEvent.data.message);
thread.events.push({ type: "human_response", data: message });
const result = await agentLoop(thread);
lastEvent = result.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(message: string) {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve(answer);
});
});
}
================================================
FILE: workshops/2025-05/sections/07-context-window/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/07-context-window/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/07-context-window/walkthrough/07-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events, null, 2);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/07-context-window/walkthrough/07b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/07-context-window/walkthrough/07c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/README.md
================================================
# Chapter 8 - Adding API Endpoints
Add an Express server to expose the agent via HTTP.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Install Express and types
npm install express && npm install --save-dev @types/express supertest
Add the server implementation
cp ./walkthrough/08-server.ts src/server.ts
show file
```ts
// ./walkthrough/08-server.ts
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const result = await agentLoop(thread);
res.json(result);
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
// optional - add state
res.status(404).json({ error: "Not implemented yet" });
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
```
Start the server
npx tsx src/server.ts
Test with curl (in another terminal)
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you add 3 and 4"}'
You should get an answer from the agent which includes the
agentic trace, ending in a message like:
{"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
let lastEvent = result.events.slice(-1)[0];
while (lastEvent.data.intent === "request_more_information") {
const message = await askHuman(lastEvent.data.message);
thread.events.push({ type: "human_response", data: message });
const result = await agentLoop(thread);
lastEvent = result.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(message: string) {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve(answer);
});
});
}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/08-api-endpoints/walkthrough/08-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const result = await agentLoop(thread);
res.json(result);
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
// optional - add state
res.status(404).json({ error: "Not implemented yet" });
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/09-state-management/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/09-state-management/README.md
================================================
# Chapter 9 - In-Memory State and Async Clarification
Add state management and async clarification support.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Add some simple in-memory state management for threads
cp ./walkthrough/09-state.ts src/state.ts
show file
```ts
// ./walkthrough/09-state.ts
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}
```
update the server to use the state management
* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id
* implement POST /thread/:id/response
```diff
src/server.ts
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
+import { ThreadStore } from '../src/state';
const app = express();
app.set('json spaces', 2);
+const store = new ThreadStore();
+
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
data: req.body.message
}]);
- const result = await agentLoop(thread);
- res.json(result);
+
+ const threadId = store.create(thread);
+ const newThread = await agentLoop(thread);
+
+ store.update(threadId, newThread);
+
+ const lastEvent = newThread.events[newThread.events.length - 1];
+ // If we exited the loop, include the response URL so the client can
+ // push a new message onto the thread
+ lastEvent.data.response_url = `/thread/${threadId}/response`;
+
+ console.log("returning last event from endpoint", lastEvent);
+
+ res.json({
+ thread_id: threadId,
+ ...newThread
+ });
});
app.get('/thread/:id', (req, res) => {
- // optional - add state
- res.status(404).json({ error: "Not implemented yet" });
+ const thread = store.get(req.params.id);
+ if (!thread) {
+ return res.status(404).json({ error: "Thread not found" });
+ }
+ res.json(thread);
});
+// POST /thread/:id/response - Handle clarification response
+app.post('/thread/:id/response', async (req, res) => {
+ let thread = store.get(req.params.id);
+ if (!thread) {
+ return res.status(404).json({ error: "Thread not found" });
+ }
+
+ thread.events.push({
+ type: "human_response",
+ data: req.body.message
+ });
+
+ // loop until stop event
+ const newThread = await agentLoop(thread);
+
+ store.update(req.params.id, newThread);
+
+ const lastEvent = newThread.events[newThread.events.length - 1];
+ lastEvent.data.response_url = `/thread/${req.params.id}/response`;
+
+ console.log("returning last event from endpoint", lastEvent);
+
+ res.json(newThread);
+});
+
const port = process.env.PORT || 3000;
app.listen(port, () => {
```
skip this step
cp ./walkthrough/09-server.ts src/server.ts
Start the server
npx tsx src/server.ts
Test clarification flow
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you multiply 3 and xyz"}'
================================================
FILE: workshops/2025-05/sections/09-state-management/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: workshops/2025-05/sections/09-state-management/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/09-state-management/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/09-state-management/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/09-state-management/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"express": "^5.1.0",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/express": "^5.0.1",
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0",
"supertest": "^7.1.0"
}
}
================================================
FILE: workshops/2025-05/sections/09-state-management/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/09-state-management/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
let lastEvent = result.events.slice(-1)[0];
while (lastEvent.data.intent === "request_more_information") {
const message = await askHuman(lastEvent.data.message);
thread.events.push({ type: "human_response", data: message });
const result = await agentLoop(thread);
lastEvent = result.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(message: string) {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve(answer);
});
});
}
================================================
FILE: workshops/2025-05/sections/09-state-management/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/09-state-management/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const result = await agentLoop(thread);
res.json(result);
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
// optional - add state
res.status(404).json({ error: "Not implemented yet" });
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/09-state-management/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/09-state-management/walkthrough/09-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
thread.events.push({
type: "human_response",
data: req.body.message
});
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/09-state-management/walkthrough/09-state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/10-human-approval/README.md
================================================
# Chapter 10 - Adding Human Approval
Add support for human approval of operations.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
update the server to handle human approvals
* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs
```diff
src/server.ts
import express from 'express';
-import { Thread, agentLoop } from '../src/agent';
+import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
});
+
+type ApprovalPayload = {
+ type: "approval";
+ approved: boolean;
+ comment?: string;
+}
+
+type ResponsePayload = {
+ type: "response";
+ response: string;
+}
+
+type Payload = ApprovalPayload | ResponsePayload;
+
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
return res.status(404).json({ error: "Thread not found" });
}
+
+ const body: Payload = req.body;
+
+ let lastEvent = thread.events[thread.events.length - 1];
+
+ if (thread.awaitingHumanResponse() && body.type === 'response') {
+ thread.events.push({
+ type: "human_response",
+ data: body.response
+ });
+ } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
+ // push feedback onto the thread
+ thread.events.push({
+ type: "tool_response",
+ data: `user denied the operation with feedback: "${body.comment}"`
+ });
+ } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
+ // approved, run the tool, pushing results onto the thread
+ await handleNextStep(lastEvent.data, thread);
+ } else {
+ res.status(400).json({
+ error: "Invalid request: " + body.type,
+ awaitingHumanResponse: thread.awaitingHumanResponse(),
+ awaitingHumanApproval: thread.awaitingHumanApproval()
+ });
+ return;
+ }
+
- thread.events.push({
- type: "human_response",
- data: req.body.message
- });
-
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
- const lastEvent = newThread.events[newThread.events.length - 1];
+ lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
```
skip this step
cp ./walkthrough/10-server.ts src/server.ts
Add a few methods to the agent to handle approvals and responses
```diff
src/agent.ts
`)
}
+
+ awaitingHumanResponse(): boolean {
+ const lastEvent = this.events[this.events.length - 1];
+ return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
+ }
+
+ awaitingHumanApproval(): boolean {
+ const lastEvent = this.events[this.events.length - 1];
+ return lastEvent.data.intent === 'divide';
+ }
}
// response to human, return the thread
return thread;
+ case "divide":
+ // divide is scary, return it for human approval
+ return thread;
case "add":
case "subtract":
case "multiply":
- case "divide":
thread = await handleNextStep(nextStep, thread);
}
```
skip this step
cp ./walkthrough/10-agent.ts src/agent.ts
Start the server
npx tsx src/server.ts
Test division with approval
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you divide 3 by 4"}'
You should see:
{
"thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
}
]
}
reject the request with another curl call, changing the thread ID
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'
You should see: the last tool call is now `"intent":"divide","a":3,"b":5`
{
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
},
{
"type": "tool_response",
"data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 5,
"response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
}
}
]
}
now you can approve the operation
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": true}'
you should see the final message includes the tool response and final result!
...
{
"type": "tool_response",
"data": 0.5
},
{
"type": "done_for_now",
"message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
"response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"express": "^5.1.0",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/express": "^5.0.1",
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0",
"supertest": "^7.1.0"
}
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
let lastEvent = result.events.slice(-1)[0];
while (lastEvent.data.intent === "request_more_information") {
const message = await askHuman(lastEvent.data.message);
thread.events.push({ type: "human_response", data: message });
const result = await agentLoop(thread);
lastEvent = result.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(message: string) {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve(answer);
});
});
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/10-human-approval/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
thread.events.push({
type: "human_response",
data: req.body.message
});
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/10-human-approval/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/walkthrough/10-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
awaitingHumanResponse(): boolean {
const lastEvent = this.events[this.events.length - 1];
return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
}
awaitingHumanApproval(): boolean {
const lastEvent = this.events[this.events.length - 1];
return lastEvent.data.intent === 'divide';
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "divide":
// divide is scary, return it for human approval
return thread;
case "add":
case "subtract":
case "multiply":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/10-human-approval/walkthrough/10-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/README.md
================================================
# Chapter 11 - Human Approvals over email
in this section, we'll add support for human approvals over email.
This will start a little bit contrived, just to get the concepts down -
We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI
While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Install HumanLayer
npm install humanlayer
Update CLI to send `divide` and `request_more_information` to a human via email
```diff
src/cli.ts
// cli.ts lets you invoke the agent loop from the command line
+import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
-
-
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
// Run the agent loop with the thread
- const result = await agentLoop(thread);
- let lastEvent = result.events.slice(-1)[0];
+ let newThread = await agentLoop(thread);
+ let lastEvent = newThread.events.slice(-1)[0];
- while (lastEvent.data.intent === "request_more_information") {
- const message = await askHuman(lastEvent.data.message);
- thread.events.push({ type: "human_response", data: message });
- const result = await agentLoop(thread);
- lastEvent = result.events.slice(-1)[0];
+ while (lastEvent.data.intent !== "done_for_now") {
+ const responseEvent = await askHuman(lastEvent);
+ thread.events.push(responseEvent);
+ newThread = await agentLoop(thread);
+ lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
console.log(lastEvent.data.message);
process.exit(0);
}
-async function askHuman(message: string) {
+async function askHuman(lastEvent: Event): Promise {
+ if (process.env.HUMANLAYER_API_KEY) {
+ return await askHumanEmail(lastEvent);
+ } else {
+ return await askHumanCLI(lastEvent.data.message);
+ }
+}
+
+async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
- resolve(answer);
+ resolve({ type: "human_response", data: answer });
});
});
}
+
+export async function askHumanEmail(lastEvent: Event): Promise {
+ if (!process.env.HUMANLAYER_EMAIL) {
+ throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+ }
+ const hl = humanlayer({ //reads apiKey from env
+ // name of this agent
+ runId: "12fa-cli-agent",
+ verbose: true,
+ contactChannel: {
+ // agent should request permission via email
+ email: {
+ address: process.env.HUMANLAYER_EMAIL,
+ }
+ }
+ })
+
+ if (lastEvent.data.intent === "divide") {
+ // fetch approval synchronously - this will block until reply
+ const response = await hl.fetchHumanApproval({
+ spec: {
+ fn: "divide",
+ kwargs: {
+ a: lastEvent.data.a,
+ b: lastEvent.data.b
+ }
+ }
+ })
+
+ if (response.approved) {
+ const result = lastEvent.data.a / lastEvent.data.b;
+ console.log("tool_response", result);
+ return {
+ "type": "tool_response",
+ "data": result
+ };
+ } else {
+ return {
+ "type": "tool_response",
+ "data": `user denied operation ${lastEvent.data.intent}
+ with feedback: ${response.comment}`
+ };
+ }
+ }
+ throw new Error(`unknown tool: ${lastEvent.data.intent}`)
+}
```
skip this step
cp ./walkthrough/11-cli.ts src/cli.ts
Run the CLI
npx tsx src/index.ts 'can you divide 4 by 5'
The last line of your program should mention human review step
nextStep { intent: 'divide', a: 4, b: 5 }
HumanLayer: Requested human approval from HumanLayer cloud
go ahead and respond to the email with some feedback:

you should get another email with an updated attempt based on your feedback!
You can go ahead and approve this one:

and your final output will look like
nextStep {
intent: 'done_for_now',
message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
}
The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!
lets implement the `request_more_information` flow as well
```diff
src/cli.ts
})
+ if (lastEvent.data.intent === "request_more_information") {
+ // fetch response synchronously - this will block until reply
+ const response = await hl.fetchHumanResponse({
+ spec: {
+ msg: lastEvent.data.message
+ }
+ })
+ return {
+ "type": "tool_response",
+ "data": response
+ }
+ }
+
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
```
skip this step
cp ./walkthrough/11b-cli.ts src/cli.ts
lets test the require_approval flow as by asking for a calculation
with garbled input:
npx tsx src/index.ts 'can you multiply 4 and xyz'
You should get an email with a request for clarification
Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?
you can response with something like
use 8 instead of xyz
you should see a final result on the CLI like
I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.
as a final step, lets explore using a custom html template for the email
```diff
src/cli.ts
email: {
address: process.env.HUMANLAYER_EMAIL,
+ // custom email body - jinja
+ template: `{% if type == 'request_more_information' %}
+{{ event.spec.msg }}
+{% else %}
+agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
+with args: {{event.spec.kwargs}}
+
+reply to this email to approve
+{% endif %}`
}
}
```
skip this step
cp ./walkthrough/11c-cli.ts src/cli.ts
first try with divide:
npx tsx src/index.ts 'can you divide 4 by 5'
you should see a slightly different email with the custom template

feel free to run with the flow and then you can try updating the template to your liking
(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)
try triggering "request_more_information" as well!
thats it - in the next chapter, we'll build a fully email-driven
workflow agent that uses webhooks for human approval
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"express": "^5.1.0",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/express": "^5.0.1",
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0",
"supertest": "^7.1.0"
}
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
awaitingHumanResponse(): boolean {
const lastEvent = this.events[this.events.length - 1];
return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
}
awaitingHumanApproval(): boolean {
const lastEvent = this.events[this.events.length - 1];
return lastEvent.data.intent === 'divide';
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "divide":
// divide is scary, return it for human approval
return thread;
case "add":
case "subtract":
case "multiply":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
let lastEvent = result.events.slice(-1)[0];
while (lastEvent.data.intent === "request_more_information") {
const message = await askHuman(lastEvent.data.message);
thread.events.push({ type: "human_response", data: message });
const result = await agentLoop(thread);
lastEvent = result.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(message: string) {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve(answer);
});
});
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/walkthrough/11-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
}
}
})
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/walkthrough/11b-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
}
}
})
if (lastEvent.data.intent === "request_more_information") {
// fetch response synchronously - this will block until reply
const response = await hl.fetchHumanResponse({
spec: {
msg: lastEvent.data.message
}
})
return {
"type": "tool_response",
"data": response
}
}
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}
================================================
FILE: workshops/2025-05/sections/11-humanlayer-approval/walkthrough/11c-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
// custom email body - jinja
template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
reply to this email to approve
{% endif %}`
}
}
})
if (lastEvent.data.intent === "request_more_information") {
// fetch response synchronously - this will block until reply
const response = await hl.fetchHumanResponse({
spec: {
msg: lastEvent.data.message
}
})
return {
"type": "tool_response",
"data": response
}
}
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/README.md
================================================
# Chapter XX - HumanLayer Webhook Integration
the previous sections used the humanlayer SDK in "synchronous mode" - that
means every time we wait for human approval, we sit in a loop
polling until the human response if received.
That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results.
add code to initialize humanlayer in the server
```diff
src/server.ts
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
+import { humanlayer } from 'humanlayer';
const app = express();
const store = new ThreadStore();
+const getHumanlayer = () => {
+ const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
+ if (!HUMANLAYER_EMAIL) {
+ throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+ }
+
+ const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
+ if (!HUMANLAYER_API_KEY) {
+ throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
+ }
+ return humanlayer({
+ runId: `12fa-agent`,
+ contactChannel: {
+ email: { address: HUMANLAYER_EMAIL }
+ }
+ });
+}
+
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
// loop until stop event
- const newThread = await agentLoop(thread);
+ const result = await agentLoop(thread);
- store.update(req.params.id, newThread);
+ store.update(req.params.id, result);
- lastEvent = newThread.events[newThread.events.length - 1];
+ lastEvent = result.events[result.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
- res.json(newThread);
+ res.json(result);
});
```
skip this step
cp ./walkthrough/12-1-server-init.ts src/server.ts
next, lets update the /thread endpoint to
1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls
Update the server to be able to handle request_clarification responses
- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response
```diff
src/server.ts
-import express from 'express';
+import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
-import { humanlayer } from 'humanlayer';
+import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
});
}
-
// POST /thread - Start new thread
-app.post('/thread', async (req, res) => {
+app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
}]);
- const threadId = store.create(thread);
- const newThread = await agentLoop(thread);
-
- store.update(threadId, newThread);
+ // run agent loop asynchronously, return immediately
+ Promise.resolve().then(async () => {
+ const threadId = store.create(thread);
+ const newThread = await agentLoop(thread);
+
+ store.update(threadId, newThread);
- const lastEvent = newThread.events[newThread.events.length - 1];
- // If we exited the loop, include the response URL so the client can
- // push a new message onto the thread
- lastEvent.data.response_url = `/thread/${threadId}/response`;
+ const lastEvent = newThread.events[newThread.events.length - 1];
- console.log("returning last event from endpoint", lastEvent);
-
- res.json({
- thread_id: threadId,
- ...newThread
+ if (thread.awaitingHumanResponse()) {
+ const hl = getHumanlayer();
+ // create a human contact - returns immediately
+ hl.createHumanContact({
+ spec: {
+ msg: lastEvent.data.message,
+ state: {
+ thread_id: threadId,
+ }
+ }
+ });
+ }
});
+
+ res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
-app.get('/thread/:id', (req, res) => {
+app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
});
+type WebhookResponse = V1Beta2HumanContactCompleted;
-type ApprovalPayload = {
- type: "approval";
- approved: boolean;
- comment?: string;
-}
+const handleHumanResponse = async (req: Request, res: Response) => {
-type ResponsePayload = {
- type: "response";
- response: string;
}
-type Payload = ApprovalPayload | ResponsePayload;
+app.post('/webhook', async (req: Request, res: Response) => {
+ console.log("webhook response", req.body);
+ const response = req.body as WebhookResponse;
-// POST /thread/:id/response - Handle clarification response
-app.post('/thread/:id/response', async (req, res) => {
- let thread = store.get(req.params.id);
+ // response is guaranteed to be set on a webhook
+ const humanResponse: string = response.event.status?.response as string;
+
+ const threadId = response.event.spec.state?.thread_id;
+ if (!threadId) {
+ return res.status(400).json({ error: "Thread ID not found" });
+ }
+
+ const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
- const body: Payload = req.body;
-
- let lastEvent = thread.events[thread.events.length - 1];
-
- if (thread.awaitingHumanResponse() && body.type === 'response') {
- thread.events.push({
- type: "human_response",
- data: body.response
- });
- } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
- // push feedback onto the thread
- thread.events.push({
- type: "tool_response",
- data: `user denied the operation with feedback: "${body.comment}"`
- });
- } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
- // approved, run the tool, pushing results onto the thread
- await handleNextStep(lastEvent.data, thread);
- } else {
- res.status(400).json({
- error: "Invalid request: " + body.type,
- awaitingHumanResponse: thread.awaitingHumanResponse(),
- awaitingHumanApproval: thread.awaitingHumanApproval()
- });
- return;
+ if (!thread.awaitingHumanResponse()) {
+ return res.status(400).json({ error: "Thread is not awaiting human response" });
}
-
- // loop until stop event
- const result = await agentLoop(thread);
-
- store.update(req.params.id, result);
-
- lastEvent = result.events[result.events.length - 1];
- lastEvent.data.response_url = `/thread/${req.params.id}/response`;
-
- console.log("returning last event from endpoint", lastEvent);
-
- res.json(result);
});
```
skip this step
cp ./walkthrough/12a-server.ts src/server.ts
Start the server in another terminal
npx tsx src/server.ts
now that the server is running, send a payload to the '/thread' endpoint
__ do the response step
__ now handle approvals for divide
__ now also handle done_for_now
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"express": "^5.1.0",
"humanlayer": "^0.7.7",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/express": "^5.0.1",
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0",
"supertest": "^7.1.0"
}
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
awaitingHumanResponse(): boolean {
const lastEvent = this.events[this.events.length - 1];
return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
}
awaitingHumanApproval(): boolean {
const lastEvent = this.events[this.events.length - 1];
return lastEvent.data.intent === 'divide';
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "divide":
// divide is scary, return it for human approval
return thread;
case "add":
case "subtract":
case "multiply":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
// custom email body - jinja
template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
reply to this email to approve
{% endif %}`
}
}
})
if (lastEvent.data.intent === "request_more_information") {
// fetch response synchronously - this will block until reply
const response = await hl.fetchHumanResponse({
spec: {
msg: lastEvent.data.message
}
})
return {
"type": "tool_response",
"data": response
}
}
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/walkthrough/12-1-server-init.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const result = await agentLoop(thread);
store.update(req.params.id, result);
lastEvent = result.events[result.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(result);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/12-humanlayer-webhook/walkthrough/12a-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
// run agent loop asynchronously, return immediately
Promise.resolve().then(async () => {
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
if (thread.awaitingHumanResponse()) {
const hl = getHumanlayer();
// create a human contact - returns immediately
hl.createHumanContact({
spec: {
msg: lastEvent.data.message,
state: {
thread_id: threadId,
}
}
});
}
});
res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
const handleHumanResponse = async (req: Request, res: Response) => {
}
app.post('/webhook', async (req: Request, res: Response) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/final/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/sections/final/README.md
================================================
# Chapter 0 - Hello World
Let's start with a basic TypeScript setup and a hello world program.
This guide is written in TypeScript (yes, a python version is coming soon)
There are many checkpoints between the every file edit in theworkshop steps,
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.
To run this guide, you'll need a relatively recent version of nodejs and npm installed
You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine
brew install node@20
You should see the node version
node --version
Copy initial package.json
cp ./walkthrough/00-package.json package.json
Install dependencies
npm install
Copy tsconfig.json
cp ./walkthrough/00-tsconfig.json tsconfig.json
add .gitignore
cp ./walkthrough/00-.gitignore .gitignore
Create src folder
mkdir -p src
Add a simple hello world index.ts
cp ./walkthrough/00-index.ts src/index.ts
Run it to verify
npx tsx src/index.ts
You should see:
hello, world!
# Chapter 1 - CLI and Agent Loop
Now let's add BAML and create our first agent with a CLI interface.
First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.
npm install @boundaryml/baml
Initialize BAML
npx baml-cli init
Remove default resume.baml
rm baml_src/resume.baml
Add our starter agent, a single baml prompt that we'll build on
cp ./walkthrough/01-agent.baml baml_src/agent.baml
Generate BAML client code
npx baml-cli generate
Enable BAML logging for this section
export BAML_LOG=debug
Add the CLI interface
cp ./walkthrough/01-cli.ts src/cli.ts
Update index.ts to use the CLI
cp ./walkthrough/01-index.ts src/index.ts
Add the agent implementation
cp ./walkthrough/01-agent.ts src/agent.ts
The the BAML code is configured to use OPENAI_API_KEY by default
As you're testing, you can change the model / provider to something else
as you please
client "openai/gpt-4o"
[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)
For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini)
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key.
export OPENAI_API_KEY=...
Try it out
npx tsx src/index.ts hello
you should see a familiar response from the model
{
intent: 'done_for_now',
message: 'Hello! How can I assist you today?'
}
# Chapter 2 - Add Calculator Tools
Let's add some calculator tools to our agent.
Let's start by adding a tool definition for the calculator
These are simpile structured outputs that we'll ask the model to
return as a "next step" in the agentic loop.
cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps
cp ./walkthrough/02-agent.baml baml_src/agent.baml
Generate updated BAML client
npx baml-cli generate
Try out the calculator
npx tsx src/index.ts 'can you add 3 and 4'
You should see a tool call to the calculator
{
intent: 'add',
a: 3,
b: 4
}
# Chapter 3 - Process Tool Calls in a Loop
Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.
First, lets update the agent to handle the tool call
cp ./walkthrough/03-agent.ts src/agent.ts
Now, lets try it out
npx tsx src/index.ts 'can you add 3 and 4'
you should see the agent call the tool and then return the result
{
intent: 'done_for_now',
message: 'The sum of 3 and 4 is 7.'
}
For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output
export BAML_LOG=off
Try a multi-step calculation
npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
you'll notice that tools like multiply and divide are not available
npx tsx src/index.ts 'can you multiply 3 and 4'
next, let's add handlers for the rest of the calculator tools
cp ./walkthrough/03b-agent.ts src/agent.ts
Test subtraction
npx tsx src/index.ts 'can you subtract 3 from 4'
now, let's test the multiplication tool
npx tsx src/index.ts 'can you multiply 3 and 4'
finally, let's test a more complex calculation with multiple operations
npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
# Chapter 4 - Add Tests to agent.baml
Let's add some tests to our BAML agent.
to start, leave the baml logs enabled
export BAML_LOG=debug
next, let's add some tests to the agent
We'll start with a simple test that checks the agent's ability to handle
a basic calculation.
cp ./walkthrough/04-agent.baml baml_src/agent.baml
Run the tests
npx baml-cli test
now, let's improve the test with assertions!
Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.
cp ./walkthrough/04b-agent.baml baml_src/agent.baml
Run the tests
npx baml-cli test
as you add more tests, you can disable the logs to keep the output clean.
You may want to turn them on as you iterate on specific tests.
export BAML_LOG=off
now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window
cp ./walkthrough/04c-agent.baml baml_src/agent.baml
let's try to run it
npx baml-cli test
# Chapter 5 - Multiple Human Tools
In this section, we'll add support for multiple tools that serve to
contact humans.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
first, let's add a tool that can request clarification from a human
this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.
cp ./walkthrough/05-agent.baml baml_src/agent.baml
next, let's re-generate the client code
NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.
npx baml-cli generate
now, let's update the agent to use the new tool
cp ./walkthrough/05-agent.ts src/agent.ts
next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI
cp ./walkthrough/05-cli.ts src/cli.ts
let's try it out
npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '
next, let's add a test that checks the agent's ability to handle
a clarification request
cp ./walkthrough/05b-agent.baml baml_src/agent.baml
and now we can run the tests again
npx baml-cli test
you'll notice the new test passes, but the hello world test fails
This is because the agent's default behavior is to return "done_for_now"
cp ./walkthrough/05c-agent.baml baml_src/agent.baml
Verify tests pass
npx baml-cli test
# Chapter 6 - Customize Your Prompt with Reasoning
In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.
this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)
for this section, it will be helpful to leave the baml logs enabled
export BAML_LOG=debug
update the agent prompt to include a reasoning step
cp ./walkthrough/06-agent.baml baml_src/agent.baml
generate the updated client
npx baml-cli generate
now, you can try it out with a simple prompt
npx tsx src/index.ts 'can you multiply 3 and 4'
you should see output from the baml logs showing the reasoning steps
#### optional challenge
add a field to your tool output format that includes the reasoning steps in the output!
# Chapter 7 - Customize Your Context Window
In this section, we'll explore how to customize the context window
of the agent.
this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
update the agent to pretty-print the Context window for the model
cp ./walkthrough/07-agent.ts src/agent.ts
Test the formatting
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
next, let's update the agent to use XML formatting instead
this is a very popular format for passing data to a model,
among other things, because of the token efficiency of XML.
cp ./walkthrough/07b-agent.ts src/agent.ts
let's try it out
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
lets update our tests to match the new output format
cp ./walkthrough/07c-agent.baml baml_src/agent.baml
check out the updated tests
npx baml-cli test
# Chapter 8 - Adding API Endpoints
Add an Express server to expose the agent via HTTP.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Install Express and types
npm install express && npm install --save-dev @types/express supertest
Add the server implementation
cp ./walkthrough/08-server.ts src/server.ts
Start the server
npx tsx src/server.ts
Test with curl (in another terminal)
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you add 3 and 4"}'
You should get an answer from the agent which includes the
agentic trace, ending in a message like:
{"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}
# Chapter 9 - In-Memory State and Async Clarification
Add state management and async clarification support.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Add some simple in-memory state management for threads
cp ./walkthrough/09-state.ts src/state.ts
update the server to use the state management
* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id
* implement POST /thread/:id/response
cp ./walkthrough/09-server.ts src/server.ts
Start the server
npx tsx src/server.ts
Test clarification flow
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you multiply 3 and xyz"}'
# Chapter 10 - Adding Human Approval
Add support for human approval of operations.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
update the server to handle human approvals
* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs
cp ./walkthrough/10-server.ts src/server.ts
Add a few methods to the agent to handle approvals and responses
cp ./walkthrough/10-agent.ts src/agent.ts
Start the server
npx tsx src/server.ts
Test division with approval
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you divide 3 by 4"}'
You should see:
{
"thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
}
]
}
reject the request with another curl call, changing the thread ID
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'
You should see: the last tool call is now `"intent":"divide","a":3,"b":5`
{
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
},
{
"type": "tool_response",
"data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 5,
"response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
}
}
]
}
now you can approve the operation
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": true}'
you should see the final message includes the tool response and final result!
...
{
"type": "tool_response",
"data": 0.5
},
{
"type": "done_for_now",
"message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
"response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
}
# Chapter 11 - Human Approvals over email
in this section, we'll add support for human approvals over email.
This will start a little bit contrived, just to get the concepts down -
We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI
While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Install HumanLayer
npm install humanlayer
Update CLI to send `divide` and `request_more_information` to a human via email
cp ./walkthrough/11-cli.ts src/cli.ts
Run the CLI
npx tsx src/index.ts 'can you divide 4 by 5'
The last line of your program should mention human review step
nextStep { intent: 'divide', a: 4, b: 5 }
HumanLayer: Requested human approval from HumanLayer cloud
go ahead and respond to the email with some feedback:

you should get another email with an updated attempt based on your feedback!
You can go ahead and approve this one:

and your final output will look like
nextStep {
intent: 'done_for_now',
message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
}
The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!
lets implement the `request_more_information` flow as well
cp ./walkthrough/11b-cli.ts src/cli.ts
lets test the require_approval flow as by asking for a calculation
with garbled input:
npx tsx src/index.ts 'can you multiply 4 and xyz'
You should get an email with a request for clarification
Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?
you can response with something like
use 8 instead of xyz
you should see a final result on the CLI like
I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.
as a final step, lets explore using a custom html template for the email
cp ./walkthrough/11c-cli.ts src/cli.ts
first try with divide:
npx tsx src/index.ts 'can you divide 4 by 5'
you should see a slightly different email with the custom template

feel free to run with the flow and then you can try updating the template to your liking
(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)
try triggering "request_more_information" as well!
thats it - in the next chapter, we'll build a fully email-driven
workflow agent that uses webhooks for human approval
# Chapter XX - HumanLayer Webhook Integration
the previous sections used the humanlayer SDK in "synchronous mode" - that
means every time we wait for human approval, we sit in a loop
polling until the human response if received.
That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results.
add code to initialize humanlayer in the server
cp ./walkthrough/12-1-server-init.ts src/server.ts
next, lets update the /thread endpoint to
1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls
Update the server to be able to handle request_clarification responses
- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response
cp ./walkthrough/12a-server.ts src/server.ts
Start the server in another terminal
npx tsx src/server.ts
now that the server is running, send a payload to the '/thread' endpoint
__ do the response step
__ now handle approvals for divide
__ now also handle done_for_now
================================================
FILE: workshops/2025-05/sections/final/baml_src/agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: workshops/2025-05/sections/final/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client CustomSonnet {
provider anthropic
options {
model "claude-3-5-sonnet-20241022"
api_key env.ANTHROPIC_API_KEY
}
}
client CustomHaiku {
provider anthropic
retry_policy Constant
options {
model "claude-3-haiku-20240307"
api_key env.ANTHROPIC_API_KEY
}
}
// https://docs.boundaryml.com/docs/snippets/clients/round-robin
client CustomFast {
provider round-robin
options {
// This will alternate between the two clients
strategy [CustomGPT4oMini, CustomHaiku]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/fallback
client OpenaiFallback {
provider fallback
options {
// This will try the clients in order until one succeeds
strategy [CustomGPT4oMini, CustomGPT4oMini]
}
}
// https://docs.boundaryml.com/docs/snippets/clients/retry
retry_policy Constant {
max_retries 3
// Strategy is optional
strategy {
type constant_delay
delay_ms 200
}
}
retry_policy Exponential {
max_retries 2
// Strategy is optional
strategy {
type exponential_backoff
delay_ms 300
multiplier 1.5
max_delay_ms 10000
}
}
================================================
FILE: workshops/2025-05/sections/final/baml_src/generators.baml
================================================
// This helps use auto generate libraries you can use in the language of
// your choice. You can have multiple generators if you use multiple languages.
// Just ensure that the output_dir is different for each generator.
generator target {
// Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi"
output_type "typescript"
// Where the generated code will be saved (relative to baml_src/)
output_dir "../"
// The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).
// The BAML VSCode extension version should also match this version.
version "0.85.0"
// Valid values: "sync", "async"
// This controls what `b.FunctionName()` will be (sync or async).
default_client_mode async
}
================================================
FILE: workshops/2025-05/sections/final/baml_src/tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/sections/final/package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"baml": "^0.0.0",
"express": "^5.1.0",
"humanlayer": "^0.7.7",
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/express": "^5.0.1",
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0",
"supertest": "^7.1.0"
}
}
================================================
FILE: workshops/2025-05/sections/final/src/agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
awaitingHumanResponse(): boolean {
const lastEvent = this.events[this.events.length - 1];
return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
}
awaitingHumanApproval(): boolean {
const lastEvent = this.events[this.events.length - 1];
return lastEvent.data.intent === 'divide';
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "divide":
// divide is scary, return it for human approval
return thread;
case "add":
case "subtract":
case "multiply":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/sections/final/src/cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
// custom email body - jinja
template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
reply to this email to approve
{% endif %}`
}
}
})
if (lastEvent.data.intent === "request_more_information") {
// fetch response synchronously - this will block until reply
const response = await hl.fetchHumanResponse({
spec: {
msg: lastEvent.data.message
}
})
return {
"type": "tool_response",
"data": response
}
}
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}
================================================
FILE: workshops/2025-05/sections/final/src/index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/sections/final/src/server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
// run agent loop asynchronously, return immediately
Promise.resolve().then(async () => {
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
if (thread.awaitingHumanResponse()) {
const hl = getHumanlayer();
// create a human contact - returns immediately
hl.createHumanContact({
spec: {
msg: lastEvent.data.message,
state: {
thread_id: threadId,
}
}
});
}
});
res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
const handleHumanResponse = async (req: Request, res: Response) => {
}
app.post('/webhook', async (req: Request, res: Response) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/sections/final/src/state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}
================================================
FILE: workshops/2025-05/sections/final/tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/walkthrough/00-.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05/walkthrough/00-index.ts
================================================
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/walkthrough/00-package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05/walkthrough/00-tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05/walkthrough/01-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
================================================
FILE: workshops/2025-05/walkthrough/01-agent.ts
================================================
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
================================================
FILE: workshops/2025-05/walkthrough/01-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
================================================
FILE: workshops/2025-05/walkthrough/01-index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05/walkthrough/02-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
================================================
FILE: workshops/2025-05/walkthrough/02-tool_calculator.baml
================================================
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
================================================
FILE: workshops/2025-05/walkthrough/03-agent.ts
================================================
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
thread.events.push({
"type": "tool_call",
"data": nextStep
});
const result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
continue;
default:
throw new Error(`Unknown intent: ${nextStep.intent}`);
}
}
}
================================================
FILE: workshops/2025-05/walkthrough/03b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
// response to human, return the next step object
return nextStep.message;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/walkthrough/04-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
}
================================================
FILE: workshops/2025-05/walkthrough/04b-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(hello, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(math_operation, {{this.intent == "multiply"}})
}
================================================
FILE: workshops/2025-05/walkthrough/04c-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
================================================
FILE: workshops/2025-05/walkthrough/05-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
================================================
FILE: workshops/2025-05/walkthrough/05-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/walkthrough/05-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
let lastEvent = result.events.slice(-1)[0];
while (lastEvent.data.intent === "request_more_information") {
const message = await askHuman(lastEvent.data.message);
thread.events.push({ type: "human_response", data: message });
const result = await agentLoop(thread);
lastEvent = result.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(message: string) {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve(answer);
});
});
}
================================================
FILE: workshops/2025-05/walkthrough/05b-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}
================================================
FILE: workshops/2025-05/walkthrough/05c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}
================================================
FILE: workshops/2025-05/walkthrough/06-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
First, always plan out what to do next, for example:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "can you multiply 3 and 4?"
}
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
[
{
"type": "user_input",
"data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
},
{
"type": "tool_call",
"data": {
"intent": "multiply",
"a": 3,
"b": 4
}
},
{
"type": "tool_response",
"data": 12
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 12,
"b": 2
}
},
{
"type": "tool_response",
"data": 6
},
{
"type": "tool_call",
"data": {
"intent": "add",
"a": 6,
"b": 12
}
},
{
"type": "tool_response",
"data": 18
}
]
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
[{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
[
{"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
{"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
{"type":"human_response","data":"lets try 12 instead"},
]
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(a, {{this.b == 12}})
@@assert(b, {{this.a == 3}})
}
================================================
FILE: workshops/2025-05/walkthrough/07-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events, null, 2);
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/walkthrough/07b-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "add":
case "subtract":
case "multiply":
case "divide":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/walkthrough/07c-agent.baml
================================================
// human tools are async requests to a human
type HumanTools = ClarificationRequest | DoneForNow
class ClarificationRequest {
intent "request_more_information" @description("you can request more information from me")
message string
}
class DoneForNow {
intent "done_for_now"
message string @description(#"
message to send to the user about the work that was done.
"#)
}
function DetermineNextStep(
thread: string
) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
Always think about what to do next first, like:
- ...
- ...
- ...
{...} // schema
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
hello!
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperation {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4?
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
test LongMath {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
a: 3
b: 4
12
a: 12
b: 2
6
a: 6
b: 12
18
"#
}
@@assert(intent, {{this.intent == "done_for_now"}})
@@assert(answer, {{"18" in this.message}})
}
test MathOperationWithClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and fe1iiaff10
"#
}
@@assert(intent, {{this.intent == "request_more_information"}})
}
test MathOperationPostClarification {
functions [DetermineNextStep]
args {
thread #"
can you multiply 3 and FD*(#F&& ?
message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
lets try 12 instead
"#
}
@@assert(intent, {{this.intent == "multiply"}})
@@assert(b, {{this.a == 3}})
@@assert(a, {{this.b == 12}})
}
================================================
FILE: workshops/2025-05/walkthrough/08-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const result = await agentLoop(thread);
res.json(result);
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
// optional - add state
res.status(404).json({ error: "Not implemented yet" });
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/walkthrough/09-server.ts
================================================
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
thread.events.push({
type: "human_response",
data: req.body.message
});
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/walkthrough/09-state.ts
================================================
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}
================================================
FILE: workshops/2025-05/walkthrough/10-agent.ts
================================================
import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
trimLeadingWhitespace(s: string) {
return s.replace(/^[ \t]+/gm, '');
}
serializeOneEvent(e: Event) {
return this.trimLeadingWhitespace(`
<${e.data?.intent || e.type}>
${
typeof e.data !== 'object' ? e.data :
Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
${e.data?.intent || e.type}>
`)
}
awaitingHumanResponse(): boolean {
const lastEvent = this.events[this.events.length - 1];
return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
}
awaitingHumanApproval(): boolean {
const lastEvent = this.events[this.events.length - 1];
return lastEvent.data.intent === 'divide';
}
}
export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
let result: number;
switch (nextStep.intent) {
case "add":
result = nextStep.a + nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "subtract":
result = nextStep.a - nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "multiply":
result = nextStep.a * nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
case "divide":
result = nextStep.a / nextStep.b;
console.log("tool_response", result);
thread.events.push({
"type": "tool_response",
"data": result
});
return thread;
}
}
export async function agentLoop(thread: Thread): Promise {
while (true) {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
console.log("nextStep", nextStep);
thread.events.push({
"type": "tool_call",
"data": nextStep
});
switch (nextStep.intent) {
case "done_for_now":
case "request_more_information":
// response to human, return the thread
return thread;
case "divide":
// divide is scary, return it for human approval
return thread;
case "add":
case "subtract":
case "multiply":
thread = await handleNextStep(nextStep, thread);
}
}
}
================================================
FILE: workshops/2025-05/walkthrough/10-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(newThread);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/walkthrough/11-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
}
}
})
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}
================================================
FILE: workshops/2025-05/walkthrough/11b-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
}
}
})
if (lastEvent.data.intent === "request_more_information") {
// fetch response synchronously - this will block until reply
const response = await hl.fetchHumanResponse({
spec: {
msg: lastEvent.data.message
}
})
return {
"type": "tool_response",
"data": response
}
}
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}
================================================
FILE: workshops/2025-05/walkthrough/11c-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
let newThread = await agentLoop(thread);
let lastEvent = newThread.events.slice(-1)[0];
while (lastEvent.data.intent !== "done_for_now") {
const responseEvent = await askHuman(lastEvent);
thread.events.push(responseEvent);
newThread = await agentLoop(thread);
lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
// optional - you could loop here too
console.log(lastEvent.data.message);
process.exit(0);
}
async function askHuman(lastEvent: Event): Promise {
if (process.env.HUMANLAYER_API_KEY) {
return await askHumanEmail(lastEvent);
} else {
return await askHumanCLI(lastEvent.data.message);
}
}
async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout
});
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
resolve({ type: "human_response", data: answer });
});
});
}
export async function askHumanEmail(lastEvent: Event): Promise {
if (!process.env.HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const hl = humanlayer({ //reads apiKey from env
// name of this agent
runId: "12fa-cli-agent",
verbose: true,
contactChannel: {
// agent should request permission via email
email: {
address: process.env.HUMANLAYER_EMAIL,
// custom email body - jinja
template: `{% if type == 'request_more_information' %}
{{ event.spec.msg }}
{% else %}
agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
with args: {{event.spec.kwargs}}
reply to this email to approve
{% endif %}`
}
}
})
if (lastEvent.data.intent === "request_more_information") {
// fetch response synchronously - this will block until reply
const response = await hl.fetchHumanResponse({
spec: {
msg: lastEvent.data.message
}
})
return {
"type": "tool_response",
"data": response
}
}
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
const response = await hl.fetchHumanApproval({
spec: {
fn: "divide",
kwargs: {
a: lastEvent.data.a,
b: lastEvent.data.b
}
}
})
if (response.approved) {
const result = lastEvent.data.a / lastEvent.data.b;
console.log("tool_response", result);
return {
"type": "tool_response",
"data": result
};
} else {
return {
"type": "tool_response",
"data": `user denied operation ${lastEvent.data.intent}
with feedback: ${response.comment}`
};
}
}
throw new Error(`unknown tool: ${lastEvent.data.intent}`)
}
================================================
FILE: workshops/2025-05/walkthrough/12-1-server-init.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const result = await agentLoop(thread);
store.update(req.params.id, result);
lastEvent = result.events[result.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(result);
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/walkthrough/12-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
// run agent loop asynchronously, return immediately
Promise.resolve().then(async () => {
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
if (thread.awaitingHumanResponse()) {
const hl = getHumanlayer();
// create a human contact - returns immediately
hl.createHumanContact({
spec: {
msg: lastEvent.data.message,
state: {
thread_id: threadId,
}
}
});
}
});
res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
const handleHumanResponse = async (req: Request, res: Response) => {
}
app.post('/webhook', async (req: Request, res: Response) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/walkthrough/12a-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
// run agent loop asynchronously, return immediately
Promise.resolve().then(async () => {
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
if (thread.awaitingHumanResponse()) {
const hl = getHumanlayer();
// create a human contact - returns immediately
hl.createHumanContact({
spec: {
msg: lastEvent.data.message,
state: {
thread_id: threadId,
}
}
});
}
});
res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
const handleHumanResponse = async (req: Request, res: Response) => {
}
app.post('/webhook', async (req: Request, res: Response) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/walkthrough/12aa-server.ts
================================================
import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
const getHumanlayer = () => {
const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
if (!HUMANLAYER_EMAIL) {
throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
}
const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
if (!HUMANLAYER_API_KEY) {
throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
}
return humanlayer({
runId: `12fa-agent`,
contactChannel: {
email: { address: HUMANLAYER_EMAIL }
}
});
}
// POST /thread - Start new thread
app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
// run agent loop asynchronously, return immediately
Promise.resolve().then(async () => {
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
if (thread.awaitingHumanResponse()) {
const hl = getHumanlayer();
// create a human contact - returns immediately
hl.createHumanContact({
spec: {
msg: lastEvent.data.message,
state: {
thread_id: threadId,
}
}
});
}
});
res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
const handleHumanResponse = async (req: Request, res: Response) => {
}
app.post('/webhook', async (req: Request, res: Response) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/walkthrough/12b-server.ts
================================================
import express from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
import { V1Beta2EmailEventReceived, V1Beta2FunctionCallCompleted, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
const store = new ThreadStore();
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const threadId = store.create(thread);
const newThread = await agentLoop(thread);
store.update(threadId, newThread);
const lastEvent = newThread.events[newThread.events.length - 1];
// If we exited the loop, include the response URL so the client can
// push a new message onto the thread
lastEvent.data.response_url = `/thread/${threadId}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json({
thread_id: threadId,
...newThread
});
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
const thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
res.json(thread);
});
type ApprovalPayload = {
type: "approval";
approved: boolean;
comment?: string;
}
type ResponsePayload = {
type: "response";
response: string;
}
type Payload = ApprovalPayload | ResponsePayload;
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
let thread = store.get(req.params.id);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
const body: Payload = req.body;
let lastEvent = thread.events[thread.events.length - 1];
if (thread.awaitingHumanResponse() && body.type === 'response') {
thread.events.push({
type: "human_response",
data: body.response
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
// push feedback onto the thread
thread.events.push({
type: "tool_response",
data: `user denied the operation with feedback: "${body.comment}"`
});
} else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
// approved, run the tool, pushing results onto the thread
await handleNextStep(lastEvent.data, thread);
} else {
res.status(400).json({
error: "Invalid request: " + body.type,
awaitingHumanResponse: thread.awaitingHumanResponse(),
awaitingHumanApproval: thread.awaitingHumanApproval()
});
return;
}
// loop until stop event
const result = await agentLoop(thread);
store.update(req.params.id, result);
lastEvent = result.events[result.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
res.json(result);
});
type WebhookResponse = V1Beta2HumanContactCompleted;
app.post('/webhook/response', async (req, res) => {
console.log("webhook response", req.body);
const response = req.body as WebhookResponse;
// response is guaranteed to be set on a webhook
const humanResponse: string = response.event.status?.response as string;
const threadId = response.event.spec.state?.thread_id;
if (!threadId) {
return res.status(400).json({ error: "Thread ID not found" });
}
const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
if (!thread.awaitingHumanResponse()) {
return res.status(400).json({ error: "Thread is not awaiting human response" });
}
thread.events.push({
type: "human_response",
data: response.event.status?.response
});
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
================================================
FILE: workshops/2025-05/walkthrough.md
================================================
# Building the 12-factor agent template from scratch
Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology.
## Cleanup
Make sure you're starting from a clean slate
Clean up existing files
rm -rf baml_src/ && rm -rf src/
## Chapter 0 - Hello World
Let's start with a basic TypeScript setup and a hello world program.
This guide is written in TypeScript (yes, a python version is coming soon)
There are many checkpoints between the every file edit in theworkshop steps,
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.
To run this guide, you'll need a relatively recent version of nodejs and npm installed
You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine
brew install node@20
You should see the node version
node --version
Copy initial package.json
cp ./walkthrough/00-package.json package.json
show file
```json
// ./walkthrough/00-package.json
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
```
Install dependencies
npm install
Copy tsconfig.json
cp ./walkthrough/00-tsconfig.json tsconfig.json
show file
```json
// ./walkthrough/00-tsconfig.json
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
```
add .gitignore
cp ./walkthrough/00-.gitignore .gitignore
show file
```gitignore
// ./walkthrough/00-.gitignore
baml_client/
node_modules/
```
Create src folder
Add a simple hello world index.ts
cp ./walkthrough/00-index.ts src/index.ts
show file
```ts
// ./walkthrough/00-index.ts
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
```
Run it to verify
npx tsx src/index.ts
You should see:
hello, world!
## Chapter 1 - CLI and Agent Loop
Now let's add BAML and create our first agent with a CLI interface.
First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.
npm install @boundaryml/baml
Initialize BAML
npx baml-cli init
Remove default resume.baml
rm baml_src/resume.baml
Add our starter agent, a single baml prompt that we'll build on
cp ./walkthrough/01-agent.baml baml_src/agent.baml
show file
```rust
// ./walkthrough/01-agent.baml
class DoneForNow {
intent "done_for_now"
message string
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client "openai/gpt-4o"
prompt #"
{{ _.role("system") }}
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
```
Generate BAML client code
npx baml-cli generate
Enable BAML logging for this section
export BAML_LOG=debug
Add the CLI interface
cp ./walkthrough/01-cli.ts src/cli.ts
show file
```ts
// ./walkthrough/01-cli.ts
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
```
Update index.ts to use the CLI
```diff
src/index.ts
+import { cli } from "./cli"
+
async function hello(): Promise {
console.log('hello, world!')
async function main() {
- await hello()
+ await cli()
}
```
skip this step
cp ./walkthrough/01-index.ts src/index.ts
Add the agent implementation
cp ./walkthrough/01-agent.ts src/agent.ts
show file
```ts
// ./walkthrough/01-agent.ts
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
```
The the BAML code is configured to use OPENAI_API_KEY by default
As you're testing, you can change the model / provider to something else
as you please
client "openai/gpt-4o"
[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)
For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini)
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key.
export OPENAI_API_KEY=...
Try it out
npx tsx src/index.ts hello
you should see a familiar response from the model
{
intent: 'done_for_now',
message: 'Hello! How can I assist you today?'
}
## Chapter 2 - Add Calculator Tools
Let's add some calculator tools to our agent.
Let's start by adding a tool definition for the calculator
These are simpile structured outputs that we'll ask the model to
return as a "next step" in the agentic loop.
cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
show file
```rust
// ./walkthrough/02-tool_calculator.baml
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
```
Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps
```diff
baml_src/agent.baml
function DetermineNextStep(
thread: string
-) -> DoneForNow {
+) -> CalculatorTools | DoneForNow {
client "openai/gpt-4o"
```
skip this step
cp ./walkthrough/02-agent.baml baml_src/agent.baml
Generate updated BAML client
npx baml-cli generate
Try out the calculator
npx tsx src/index.ts 'can you add 3 and 4'
You should see a tool call to the calculator
{
intent: 'add',
a: 3,
b: 4
}
## Chapter 3 - Process Tool Calls in a Loop
Now let's add a real agentic loop that can run the tools and get a final answer from the LLM.
First, lets update the agent to handle the tool call
```diff
src/agent.ts
}
-// right now this just runs one turn with the LLM, but
-// we'll update this function to handle all the agent logic
-export async function agentLoop(thread: Thread): Promise {
- const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
- return nextStep;
+
+
+export async function agentLoop(thread: Thread): Promise {
+
+ while (true) {
+ const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
+ console.log("nextStep", nextStep);
+
+ switch (nextStep.intent) {
+ case "done_for_now":
+ // response to human, return the next step object
+ return nextStep.message;
+ case "add":
+ thread.events.push({
+ "type": "tool_call",
+ "data": nextStep
+ });
+ const result = nextStep.a + nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ continue;
+ default:
+ throw new Error(`Unknown intent: ${nextStep.intent}`);
+ }
+ }
}
```
skip this step
cp ./walkthrough/03-agent.ts src/agent.ts
Now, lets try it out
npx tsx src/index.ts 'can you add 3 and 4'
you should see the agent call the tool and then return the result
{
intent: 'done_for_now',
message: 'The sum of 3 and 4 is 7.'
}
For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output
export BAML_LOG=off
Try a multi-step calculation
npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
you'll notice that tools like multiply and divide are not available
npx tsx src/index.ts 'can you multiply 3 and 4'
next, let's add handlers for the rest of the calculator tools
```diff
src/agent.ts
-import { b } from "../baml_client";
+import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client";
-// tool call or a respond to human tool
-type AgentResponse = Awaited>;
-
export interface Event {
type: string
}
+export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool;
+export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise {
+ let result: number;
+ switch (nextStep.intent) {
+ case "add":
+ result = nextStep.a + nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "subtract":
+ result = nextStep.a - nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "multiply":
+ result = nextStep.a * nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ case "divide":
+ result = nextStep.a / nextStep.b;
+ console.log("tool_response", result);
+ thread.events.push({
+ "type": "tool_response",
+ "data": result
+ });
+ return thread;
+ }
+}
export async function agentLoop(thread: Thread): Promise {
console.log("nextStep", nextStep);
+ thread.events.push({
+ "type": "tool_call",
+ "data": nextStep
+ });
+
switch (nextStep.intent) {
case "done_for_now":
return nextStep.message;
case "add":
- thread.events.push({
- "type": "tool_call",
- "data": nextStep
- });
- const result = nextStep.a + nextStep.b;
- console.log("tool_response", result);
- thread.events.push({
- "type": "tool_response",
- "data": result
- });
- continue;
- default:
- throw new Error(`Unknown intent: ${nextStep.intent}`);
+ case "subtract":
+ case "multiply":
+ case "divide":
+ thread = await handleNextStep(nextStep, thread);
}
}
```
skip this step
cp ./walkthrough/03b-agent.ts src/agent.ts
Test subtraction
npx tsx src/index.ts 'can you subtract 3 from 4'
now, let's test the multiplication tool
npx tsx src/index.ts 'can you multiply 3 and 4'
finally, let's test a more complex calculation with multiple operations
npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
## Chapter 4 - Add Tests to agent.baml
Let's add some tests to our BAML agent.
to start, leave the baml logs enabled
export BAML_LOG=debug
next, let's add some tests to the agent
We'll start with a simple test that checks the agent's ability to handle
a basic calculation.
```diff
baml_src/agent.baml
"#
}
+
+test MathOperation {
+ functions [DetermineNextStep]
+ args {
+ thread #"
+ {
+ "type": "user_input",
+ "data": "can you multiply 3 and 4?"
+ }
+ "#
+ }
+}
+
```
skip this step
cp ./walkthrough/04-agent.baml baml_src/agent.baml
Run the tests
npx baml-cli test
now, let's improve the test with assertions!
Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.
```diff
baml_src/agent.baml
"#
}
+ @@assert(hello, {{this.intent == "done_for_now"}})
}
"#
}
+ @@assert(math_operation, {{this.intent == "multiply"}})
}
```
skip this step
cp ./walkthrough/04b-agent.baml baml_src/agent.baml
Run the tests
npx baml-cli test
as you add more tests, you can disable the logs to keep the output clean.
You may want to turn them on as you iterate on specific tests.
export BAML_LOG=off
now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window
```diff
baml_src/agent.baml
"#
}
- @@assert(hello, {{this.intent == "done_for_now"}})
+ @@assert(intent, {{this.intent == "done_for_now"}})
}
"#
}
- @@assert(math_operation, {{this.intent == "multiply"}})
+ @@assert(intent, {{this.intent == "multiply"}})
}
+test LongMath {
+ functions [DetermineNextStep]
+ args {
+ thread #"
+ [
+ {
+ "type": "user_input",
+ "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
+ },
+ {
+ "type": "tool_call",
+ "data": {
+ "intent": "multiply",
+ "a": 3,
+ "b": 4
+ }
+ },
+ {
+ "type": "tool_response",
+ "data": 12
+ },
+ {
+ "type": "tool_call",
+ "data": {
+ "intent": "divide",
+ "a": 12,
+ "b": 2
+ }
+ },
+ {
+ "type": "tool_response",
+ "data": 6
+ },
+ {
+ "type": "tool_call",
+ "data": {
+ "intent": "add",
+ "a": 6,
+ "b": 12
+ }
+ },
+ {
+ "type": "tool_response",
+ "data": 18
+ }
+ ]
+ "#
+ }
+ @@assert(intent, {{this.intent == "done_for_now"}})
+ @@assert(answer, {{"18" in this.message}})
+}
+
```
skip this step
cp ./walkthrough/04c-agent.baml baml_src/agent.baml
let's try to run it
npx baml-cli test
## Chapter 5 - Multiple Human Tools
In this section, we'll add support for multiple tools that serve to
contact humans.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
first, let's add a tool that can request clarification from a human
this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.
```diff
baml_src/agent.baml
+// human tools are async requests to a human
+type HumanTools = ClarificationRequest | DoneForNow
+
+class ClarificationRequest {
+ intent "request_more_information" @description("you can request more information from me")
+ message string
+}
+
class DoneForNow {
intent "done_for_now"
- message string
+
+ message string @description(#"
+ message to send to the user about the work that was done.
+ "#)
}
function DetermineNextStep(
thread: string
-) -> CalculatorTools | DoneForNow {
+) -> HumanTools | CalculatorTools {
client "openai/gpt-4o"
}
+
```
skip this step
cp ./walkthrough/05-agent.baml baml_src/agent.baml
next, let's re-generate the client code
NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.
npx baml-cli generate
now, let's update the agent to use the new tool
```diff
src/agent.ts
}
-export async function agentLoop(thread: Thread): Promise {
+export async function agentLoop(thread: Thread): Promise {
while (true) {
switch (nextStep.intent) {
case "done_for_now":
- // response to human, return the next step object
- return nextStep.message;
+ case "request_more_information":
+ // response to human, return the thread
+ return thread;
case "add":
case "subtract":
```
skip this step
cp ./walkthrough/05-agent.ts src/agent.ts
next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI
```diff
src/cli.ts
// cli.ts lets you invoke the agent loop from the command line
-import { agentLoop, Thread, Event } from "./agent";
+import { agentLoop, Thread, Event } from "../src/agent";
+
+
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
// Run the agent loop with the thread
const result = await agentLoop(thread);
- console.log(result);
+ let lastEvent = result.events.slice(-1)[0];
+
+ while (lastEvent.data.intent === "request_more_information") {
+ const message = await askHuman(lastEvent.data.message);
+ thread.events.push({ type: "human_response", data: message });
+ const result = await agentLoop(thread);
+ lastEvent = result.events.slice(-1)[0];
+ }
+
+ // print the final result
+ // optional - you could loop here too
+ console.log(lastEvent.data.message);
+ process.exit(0);
}
+
+async function askHuman(message: string) {
+ const readline = require('readline').createInterface({
+ input: process.stdin,
+ output: process.stdout
+ });
+
+ return new Promise((resolve) => {
+ readline.question(`${message}\n> `, (answer: string) => {
+ resolve(answer);
+ });
+ });
+}
```
skip this step
cp ./walkthrough/05-cli.ts src/cli.ts
let's try it out
npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '
next, let's add a test that checks the agent's ability to handle
a clarification request
```diff
baml_src/agent.baml
+
+test MathOperationWithClarification {
+ functions [DetermineNextStep]
+ args {
+ thread #"
+ [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+ "#
+ }
+ @@assert(intent, {{this.intent == "request_more_information"}})
+}
+
+test MathOperationPostClarification {
+ functions [DetermineNextStep]
+ args {
+ thread #"
+ [
+ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
+ {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
+ {"type":"human_response","data":"lets try 12 instead"},
+ ]
+ "#
+ }
+ @@assert(intent, {{this.intent == "multiply"}})
+ @@assert(a, {{this.b == 12}})
+ @@assert(b, {{this.a == 3}})
+}
+
+
+
```
skip this step
cp ./walkthrough/05b-agent.baml baml_src/agent.baml
and now we can run the tests again
npx baml-cli test
you'll notice the new test passes, but the hello world test fails
This is because the agent's default behavior is to return "done_for_now"
```diff
baml_src/agent.baml
"#
}
- @@assert(intent, {{this.intent == "done_for_now"}})
+ @@assert(intent, {{this.intent == "request_more_information"}})
}
```
skip this step
cp ./walkthrough/05c-agent.baml baml_src/agent.baml
Verify tests pass
npx baml-cli test
## Chapter 6 - Customize Your Prompt with Reasoning
In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.
this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)
for this section, it will be helpful to leave the baml logs enabled
export BAML_LOG=debug
update the agent prompt to include a reasoning step
```diff
baml_src/agent.baml
{{ ctx.output_format }}
+
+ First, always plan out what to do next, for example:
+
+ - ...
+ - ...
+ - ...
+
+ {...} // schema
"#
}
@@assert(b, {{this.a == 3}})
}
-
-
```
skip this step
cp ./walkthrough/06-agent.baml baml_src/agent.baml
generate the updated client
npx baml-cli generate
now, you can try it out with a simple prompt
npx tsx src/index.ts 'can you multiply 3 and 4'
you should see output from the baml logs showing the reasoning steps
#### optional challenge
add a field to your tool output format that includes the reasoning steps in the output!
## Chapter 7 - Customize Your Context Window
In this section, we'll explore how to customize the context window
of the agent.
this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
update the agent to pretty-print the Context window for the model
```diff
src/agent.ts
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
- return JSON.stringify(this.events);
+ return JSON.stringify(this.events, null, 2);
}
}
```
skip this step
cp ./walkthrough/07-agent.ts src/agent.ts
Test the formatting
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
next, let's update the agent to use XML formatting instead
this is a very popular format for passing data to a model,
among other things, because of the token efficiency of XML.
```diff
src/agent.ts
serializeForLLM() {
- // can change this to whatever custom serialization you want to do, XML, etc
- // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
- return JSON.stringify(this.events, null, 2);
+ return this.events.map(e => this.serializeOneEvent(e)).join("\n");
}
+
+ trimLeadingWhitespace(s: string) {
+ return s.replace(/^[ \t]+/gm, '');
+ }
+
+ serializeOneEvent(e: Event) {
+ return this.trimLeadingWhitespace(`
+ <${e.data?.intent || e.type}>
+ ${
+ typeof e.data !== 'object' ? e.data :
+ Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")}
+ ${e.data?.intent || e.type}>
+ `)
+ }
}
```
skip this step
cp ./walkthrough/07b-agent.ts src/agent.ts
let's try it out
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
lets update our tests to match the new output format
```diff
baml_src/agent.baml
{{ ctx.output_format }}
- First, always plan out what to do next, for example:
+ Always think about what to do next first, like:
- ...
args {
thread #"
- {
- "type": "user_input",
- "data": "hello!"
- }
+
+ hello!
+
"#
}
args {
thread #"
- {
- "type": "user_input",
- "data": "can you multiply 3 and 4?"
- }
+
+ can you multiply 3 and 4?
+
"#
}
args {
thread #"
- [
- {
- "type": "user_input",
- "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?"
- },
- {
- "type": "tool_call",
- "data": {
- "intent": "multiply",
- "a": 3,
- "b": 4
- }
- },
- {
- "type": "tool_response",
- "data": 12
- },
- {
- "type": "tool_call",
- "data": {
- "intent": "divide",
- "a": 12,
- "b": 2
- }
- },
- {
- "type": "tool_response",
- "data": 6
- },
- {
- "type": "tool_call",
- "data": {
- "intent": "add",
- "a": 6,
- "b": 12
- }
- },
- {
- "type": "tool_response",
- "data": 18
- }
- ]
+
+ can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?
+
+
+
+
+ a: 3
+ b: 4
+
+
+
+
+ 12
+
+
+
+
+ a: 12
+ b: 2
+
+
+
+
+ 6
+
+
+
+
+ a: 6
+ b: 12
+
+
+
+
+ 18
+
+
"#
}
args {
thread #"
- [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}]
+
+ can you multiply 3 and fe1iiaff10
+
"#
}
args {
thread #"
- [
- {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"},
- {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}},
- {"type":"human_response","data":"lets try 12 instead"},
- ]
+
+ can you multiply 3 and FD*(#F&& ?
+
+
+
+ message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?
+
+
+
+ lets try 12 instead
+
"#
}
@@assert(intent, {{this.intent == "multiply"}})
}
```
skip this step
cp ./walkthrough/07c-agent.baml baml_src/agent.baml
check out the updated tests
npx baml-cli test
## Chapter 8 - Adding API Endpoints
Add an Express server to expose the agent via HTTP.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Install Express and types
npm install express && npm install --save-dev @types/express supertest
Add the server implementation
cp ./walkthrough/08-server.ts src/server.ts
show file
```ts
// ./walkthrough/08-server.ts
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
const app = express();
app.use(express.json());
app.set('json spaces', 2);
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
const thread = new Thread([{
type: "user_input",
data: req.body.message
}]);
const result = await agentLoop(thread);
res.json(result);
});
// GET /thread/:id - Get thread status
app.get('/thread/:id', (req, res) => {
// optional - add state
res.status(404).json({ error: "Not implemented yet" });
});
const port = process.env.PORT || 3000;
app.listen(port, () => {
console.log(`Server running on port ${port}`);
});
export { app };
```
Start the server
npx tsx src/server.ts
Test with curl (in another terminal)
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you add 3 and 4"}'
You should get an answer from the agent which includes the
agentic trace, ending in a message like:
{"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}
## Chapter 9 - In-Memory State and Async Clarification
Add state management and async clarification support.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Add some simple in-memory state management for threads
cp ./walkthrough/09-state.ts src/state.ts
show file
```ts
// ./walkthrough/09-state.ts
import crypto from 'crypto';
import { Thread } from '../src/agent';
// you can replace this with any simple state management,
// e.g. redis, sqlite, postgres, etc
export class ThreadStore {
private threads: Map = new Map();
create(thread: Thread): string {
const id = crypto.randomUUID();
this.threads.set(id, thread);
return id;
}
get(id: string): Thread | undefined {
return this.threads.get(id);
}
update(id: string, thread: Thread): void {
this.threads.set(id, thread);
}
}
```
update the server to use the state management
* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id
* implement POST /thread/:id/response
```diff
src/server.ts
import express from 'express';
import { Thread, agentLoop } from '../src/agent';
+import { ThreadStore } from '../src/state';
const app = express();
app.set('json spaces', 2);
+const store = new ThreadStore();
+
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
data: req.body.message
}]);
- const result = await agentLoop(thread);
- res.json(result);
+
+ const threadId = store.create(thread);
+ const newThread = await agentLoop(thread);
+
+ store.update(threadId, newThread);
+
+ const lastEvent = newThread.events[newThread.events.length - 1];
+ // If we exited the loop, include the response URL so the client can
+ // push a new message onto the thread
+ lastEvent.data.response_url = `/thread/${threadId}/response`;
+
+ console.log("returning last event from endpoint", lastEvent);
+
+ res.json({
+ thread_id: threadId,
+ ...newThread
+ });
});
app.get('/thread/:id', (req, res) => {
- // optional - add state
- res.status(404).json({ error: "Not implemented yet" });
+ const thread = store.get(req.params.id);
+ if (!thread) {
+ return res.status(404).json({ error: "Thread not found" });
+ }
+ res.json(thread);
});
+// POST /thread/:id/response - Handle clarification response
+app.post('/thread/:id/response', async (req, res) => {
+ let thread = store.get(req.params.id);
+ if (!thread) {
+ return res.status(404).json({ error: "Thread not found" });
+ }
+
+ thread.events.push({
+ type: "human_response",
+ data: req.body.message
+ });
+
+ // loop until stop event
+ const newThread = await agentLoop(thread);
+
+ store.update(req.params.id, newThread);
+
+ const lastEvent = newThread.events[newThread.events.length - 1];
+ lastEvent.data.response_url = `/thread/${req.params.id}/response`;
+
+ console.log("returning last event from endpoint", lastEvent);
+
+ res.json(newThread);
+});
+
const port = process.env.PORT || 3000;
app.listen(port, () => {
```
skip this step
cp ./walkthrough/09-server.ts src/server.ts
Start the server
npx tsx src/server.ts
Test clarification flow
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you multiply 3 and xyz"}'
## Chapter 10 - Adding Human Approval
Add support for human approval of operations.
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
update the server to handle human approvals
* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs
```diff
src/server.ts
import express from 'express';
-import { Thread, agentLoop } from '../src/agent';
+import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
});
+
+type ApprovalPayload = {
+ type: "approval";
+ approved: boolean;
+ comment?: string;
+}
+
+type ResponsePayload = {
+ type: "response";
+ response: string;
+}
+
+type Payload = ApprovalPayload | ResponsePayload;
+
// POST /thread/:id/response - Handle clarification response
app.post('/thread/:id/response', async (req, res) => {
return res.status(404).json({ error: "Thread not found" });
}
+
+ const body: Payload = req.body;
+
+ let lastEvent = thread.events[thread.events.length - 1];
+
+ if (thread.awaitingHumanResponse() && body.type === 'response') {
+ thread.events.push({
+ type: "human_response",
+ data: body.response
+ });
+ } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
+ // push feedback onto the thread
+ thread.events.push({
+ type: "tool_response",
+ data: `user denied the operation with feedback: "${body.comment}"`
+ });
+ } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
+ // approved, run the tool, pushing results onto the thread
+ await handleNextStep(lastEvent.data, thread);
+ } else {
+ res.status(400).json({
+ error: "Invalid request: " + body.type,
+ awaitingHumanResponse: thread.awaitingHumanResponse(),
+ awaitingHumanApproval: thread.awaitingHumanApproval()
+ });
+ return;
+ }
+
- thread.events.push({
- type: "human_response",
- data: req.body.message
- });
-
// loop until stop event
const newThread = await agentLoop(thread);
store.update(req.params.id, newThread);
- const lastEvent = newThread.events[newThread.events.length - 1];
+ lastEvent = newThread.events[newThread.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
```
skip this step
cp ./walkthrough/10-server.ts src/server.ts
Add a few methods to the agent to handle approvals and responses
```diff
src/agent.ts
`)
}
+
+ awaitingHumanResponse(): boolean {
+ const lastEvent = this.events[this.events.length - 1];
+ return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent);
+ }
+
+ awaitingHumanApproval(): boolean {
+ const lastEvent = this.events[this.events.length - 1];
+ return lastEvent.data.intent === 'divide';
+ }
}
// response to human, return the thread
return thread;
+ case "divide":
+ // divide is scary, return it for human approval
+ return thread;
case "add":
case "subtract":
case "multiply":
- case "divide":
thread = await handleNextStep(nextStep, thread);
}
```
skip this step
cp ./walkthrough/10-agent.ts src/agent.ts
Start the server
npx tsx src/server.ts
Test division with approval
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you divide 3 by 4"}'
You should see:
{
"thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
}
]
}
reject the request with another curl call, changing the thread ID
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'
You should see: the last tool call is now `"intent":"divide","a":3,"b":5`
{
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
},
{
"type": "tool_response",
"data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 5,
"response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
}
}
]
}
now you can approve the operation
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": true}'
you should see the final message includes the tool response and final result!
...
{
"type": "tool_response",
"data": 0.5
},
{
"type": "done_for_now",
"message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
"response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
}
## Chapter 11 - Human Approvals over email
in this section, we'll add support for human approvals over email.
This will start a little bit contrived, just to get the concepts down -
We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI
While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)
for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details.
export BAML_LOG=off
Install HumanLayer
npm install humanlayer
Update CLI to send `divide` and `request_more_information` to a human via email
```diff
src/cli.ts
// cli.ts lets you invoke the agent loop from the command line
+import { humanlayer } from "humanlayer";
import { agentLoop, Thread, Event } from "../src/agent";
-
-
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
// Run the agent loop with the thread
- const result = await agentLoop(thread);
- let lastEvent = result.events.slice(-1)[0];
+ let newThread = await agentLoop(thread);
+ let lastEvent = newThread.events.slice(-1)[0];
- while (lastEvent.data.intent === "request_more_information") {
- const message = await askHuman(lastEvent.data.message);
- thread.events.push({ type: "human_response", data: message });
- const result = await agentLoop(thread);
- lastEvent = result.events.slice(-1)[0];
+ while (lastEvent.data.intent !== "done_for_now") {
+ const responseEvent = await askHuman(lastEvent);
+ thread.events.push(responseEvent);
+ newThread = await agentLoop(thread);
+ lastEvent = newThread.events.slice(-1)[0];
}
// print the final result
console.log(lastEvent.data.message);
process.exit(0);
}
-async function askHuman(message: string) {
+async function askHuman(lastEvent: Event): Promise {
+ if (process.env.HUMANLAYER_API_KEY) {
+ return await askHumanEmail(lastEvent);
+ } else {
+ return await askHumanCLI(lastEvent.data.message);
+ }
+}
+
+async function askHumanCLI(message: string): Promise {
const readline = require('readline').createInterface({
input: process.stdin,
return new Promise((resolve) => {
readline.question(`${message}\n> `, (answer: string) => {
- resolve(answer);
+ resolve({ type: "human_response", data: answer });
});
});
}
+
+export async function askHumanEmail(lastEvent: Event): Promise {
+ if (!process.env.HUMANLAYER_EMAIL) {
+ throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+ }
+ const hl = humanlayer({ //reads apiKey from env
+ // name of this agent
+ runId: "12fa-cli-agent",
+ verbose: true,
+ contactChannel: {
+ // agent should request permission via email
+ email: {
+ address: process.env.HUMANLAYER_EMAIL,
+ }
+ }
+ })
+
+ if (lastEvent.data.intent === "divide") {
+ // fetch approval synchronously - this will block until reply
+ const response = await hl.fetchHumanApproval({
+ spec: {
+ fn: "divide",
+ kwargs: {
+ a: lastEvent.data.a,
+ b: lastEvent.data.b
+ }
+ }
+ })
+
+ if (response.approved) {
+ const result = lastEvent.data.a / lastEvent.data.b;
+ console.log("tool_response", result);
+ return {
+ "type": "tool_response",
+ "data": result
+ };
+ } else {
+ return {
+ "type": "tool_response",
+ "data": `user denied operation ${lastEvent.data.intent}
+ with feedback: ${response.comment}`
+ };
+ }
+ }
+ throw new Error(`unknown tool: ${lastEvent.data.intent}`)
+}
```
skip this step
cp ./walkthrough/11-cli.ts src/cli.ts
Run the CLI
npx tsx src/index.ts 'can you divide 4 by 5'
The last line of your program should mention human review step
nextStep { intent: 'divide', a: 4, b: 5 }
HumanLayer: Requested human approval from HumanLayer cloud
go ahead and respond to the email with some feedback:

you should get another email with an updated attempt based on your feedback!
You can go ahead and approve this one:

and your final output will look like
nextStep {
intent: 'done_for_now',
message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
}
The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!
lets implement the `request_more_information` flow as well
```diff
src/cli.ts
})
+ if (lastEvent.data.intent === "request_more_information") {
+ // fetch response synchronously - this will block until reply
+ const response = await hl.fetchHumanResponse({
+ spec: {
+ msg: lastEvent.data.message
+ }
+ })
+ return {
+ "type": "tool_response",
+ "data": response
+ }
+ }
+
if (lastEvent.data.intent === "divide") {
// fetch approval synchronously - this will block until reply
```
skip this step
cp ./walkthrough/11b-cli.ts src/cli.ts
lets test the require_approval flow as by asking for a calculation
with garbled input:
npx tsx src/index.ts 'can you multiply 4 and xyz'
You should get an email with a request for clarification
Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?
you can response with something like
use 8 instead of xyz
you should see a final result on the CLI like
I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.
as a final step, lets explore using a custom html template for the email
```diff
src/cli.ts
email: {
address: process.env.HUMANLAYER_EMAIL,
+ // custom email body - jinja
+ template: `{% if type == 'request_more_information' %}
+{{ event.spec.msg }}
+{% else %}
+agent {{ event.run_id }} is requesting approval for {{event.spec.fn}}
+with args: {{event.spec.kwargs}}
+
+reply to this email to approve
+{% endif %}`
}
}
```
skip this step
cp ./walkthrough/11c-cli.ts src/cli.ts
first try with divide:
npx tsx src/index.ts 'can you divide 4 by 5'
you should see a slightly different email with the custom template

feel free to run with the flow and then you can try updating the template to your liking
(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)
try triggering "request_more_information" as well!
thats it - in the next chapter, we'll build a fully email-driven
workflow agent that uses webhooks for human approval
## Chapter XX - HumanLayer Webhook Integration
the previous sections used the humanlayer SDK in "synchronous mode" - that
means every time we wait for human approval, we sit in a loop
polling until the human response if received.
That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results.
add code to initialize humanlayer in the server
```diff
src/server.ts
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
+import { humanlayer } from 'humanlayer';
const app = express();
const store = new ThreadStore();
+const getHumanlayer = () => {
+ const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL;
+ if (!HUMANLAYER_EMAIL) {
+ throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL");
+ }
+
+ const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY;
+ if (!HUMANLAYER_API_KEY) {
+ throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY");
+ }
+ return humanlayer({
+ runId: `12fa-agent`,
+ contactChannel: {
+ email: { address: HUMANLAYER_EMAIL }
+ }
+ });
+}
+
// POST /thread - Start new thread
app.post('/thread', async (req, res) => {
// loop until stop event
- const newThread = await agentLoop(thread);
+ const result = await agentLoop(thread);
- store.update(req.params.id, newThread);
+ store.update(req.params.id, result);
- lastEvent = newThread.events[newThread.events.length - 1];
+ lastEvent = result.events[result.events.length - 1];
lastEvent.data.response_url = `/thread/${req.params.id}/response`;
console.log("returning last event from endpoint", lastEvent);
- res.json(newThread);
+ res.json(result);
});
```
skip this step
cp ./walkthrough/12-1-server-init.ts src/server.ts
next, lets update the /thread endpoint to
1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls
Update the server to be able to handle request_clarification responses
- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response
```diff
src/server.ts
-import express from 'express';
+import express, { Request, Response } from 'express';
import { Thread, agentLoop, handleNextStep } from '../src/agent';
import { ThreadStore } from '../src/state';
-import { humanlayer } from 'humanlayer';
+import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer';
const app = express();
});
}
-
// POST /thread - Start new thread
-app.post('/thread', async (req, res) => {
+app.post('/thread', async (req: Request, res: Response) => {
const thread = new Thread([{
type: "user_input",
}]);
- const threadId = store.create(thread);
- const newThread = await agentLoop(thread);
-
- store.update(threadId, newThread);
+ // run agent loop asynchronously, return immediately
+ Promise.resolve().then(async () => {
+ const threadId = store.create(thread);
+ const newThread = await agentLoop(thread);
+
+ store.update(threadId, newThread);
- const lastEvent = newThread.events[newThread.events.length - 1];
- // If we exited the loop, include the response URL so the client can
- // push a new message onto the thread
- lastEvent.data.response_url = `/thread/${threadId}/response`;
+ const lastEvent = newThread.events[newThread.events.length - 1];
- console.log("returning last event from endpoint", lastEvent);
-
- res.json({
- thread_id: threadId,
- ...newThread
+ if (thread.awaitingHumanResponse()) {
+ const hl = getHumanlayer();
+ // create a human contact - returns immediately
+ hl.createHumanContact({
+ spec: {
+ msg: lastEvent.data.message,
+ state: {
+ thread_id: threadId,
+ }
+ }
+ });
+ }
});
+
+ res.json({ status: "processing" });
});
// GET /thread/:id - Get thread status
-app.get('/thread/:id', (req, res) => {
+app.get('/thread/:id', (req: Request, res: Response) => {
const thread = store.get(req.params.id);
if (!thread) {
});
+type WebhookResponse = V1Beta2HumanContactCompleted;
-type ApprovalPayload = {
- type: "approval";
- approved: boolean;
- comment?: string;
-}
+const handleHumanResponse = async (req: Request, res: Response) => {
-type ResponsePayload = {
- type: "response";
- response: string;
}
-type Payload = ApprovalPayload | ResponsePayload;
+app.post('/webhook', async (req: Request, res: Response) => {
+ console.log("webhook response", req.body);
+ const response = req.body as WebhookResponse;
-// POST /thread/:id/response - Handle clarification response
-app.post('/thread/:id/response', async (req, res) => {
- let thread = store.get(req.params.id);
+ // response is guaranteed to be set on a webhook
+ const humanResponse: string = response.event.status?.response as string;
+
+ const threadId = response.event.spec.state?.thread_id;
+ if (!threadId) {
+ return res.status(400).json({ error: "Thread ID not found" });
+ }
+
+ const thread = store.get(threadId);
if (!thread) {
return res.status(404).json({ error: "Thread not found" });
}
- const body: Payload = req.body;
-
- let lastEvent = thread.events[thread.events.length - 1];
-
- if (thread.awaitingHumanResponse() && body.type === 'response') {
- thread.events.push({
- type: "human_response",
- data: body.response
- });
- } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) {
- // push feedback onto the thread
- thread.events.push({
- type: "tool_response",
- data: `user denied the operation with feedback: "${body.comment}"`
- });
- } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) {
- // approved, run the tool, pushing results onto the thread
- await handleNextStep(lastEvent.data, thread);
- } else {
- res.status(400).json({
- error: "Invalid request: " + body.type,
- awaitingHumanResponse: thread.awaitingHumanResponse(),
- awaitingHumanApproval: thread.awaitingHumanApproval()
- });
- return;
+ if (!thread.awaitingHumanResponse()) {
+ return res.status(400).json({ error: "Thread is not awaiting human response" });
}
-
- // loop until stop event
- const result = await agentLoop(thread);
-
- store.update(req.params.id, result);
-
- lastEvent = result.events[result.events.length - 1];
- lastEvent.data.response_url = `/thread/${req.params.id}/response`;
-
- console.log("returning last event from endpoint", lastEvent);
-
- res.json(result);
});
```
skip this step
cp ./walkthrough/12a-server.ts src/server.ts
Start the server in another terminal
npx tsx src/server.ts
now that the server is running, send a payload to the '/thread' endpoint
__ do the response step
__ now handle approvals for divide
__ now also handle done_for_now
================================================
FILE: workshops/2025-05/walkthrough.yaml
================================================
title: "Building the 12-factor agent template from scratch"
text: "Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology."
targets:
- markdown: "./build/walkthrough.md"
onChange:
diff: true
cp: true
newFiles:
cat: false
cp: true
- folders:
path: "./build/sections"
skip:
- "cleanup"
final:
dirName: "final"
sections:
- name: cleanup
title: "Cleanup"
text: "Make sure you're starting from a clean slate"
steps:
- text: "Clean up existing files"
command: |
rm -rf baml_src/ && rm -rf src/
- name: hello-world
title: "Chapter 0 - Hello World"
text: "Let's start with a basic TypeScript setup and a hello world program."
steps:
- text: |
This guide is written in TypeScript (yes, a python version is coming soon)
There are many checkpoints between the every file edit in theworkshop steps,
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.
To run this guide, you'll need a relatively recent version of nodejs and npm installed
You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine
command:
brew install node@20
results:
- text: "You should see the node version"
code: |
node --version
- text: "Copy initial package.json"
file: {src: ./walkthrough/00-package.json, dest: package.json}
- text: "Install dependencies"
command: |
npm install
incremental: true
- text: "Copy tsconfig.json"
file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json}
- text: "add .gitignore"
file: {src: ./walkthrough/00-.gitignore, dest: .gitignore}
- text: "Create src folder"
dir: {create: true, path: src}
- text: "Add a simple hello world index.ts"
file: {src: ./walkthrough/00-index.ts, dest: src/index.ts}
- text: "Run it to verify"
command: |
npx tsx src/index.ts
results:
- text: "You should see:"
code: |
hello, world!
- name: cli-and-agent
title: "Chapter 1 - CLI and Agent Loop"
text: "Now let's add BAML and create our first agent with a CLI interface."
steps:
- text: |
First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.
command: |
npm install @boundaryml/baml
incremental: true
- text: "Initialize BAML"
command: |
npx baml-cli init
incremental: true
- text: "Remove default resume.baml"
command: |
rm baml_src/resume.baml
incremental: true
- text: "Add our starter agent, a single baml prompt that we'll build on"
file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml}
- text: "Generate BAML client code"
command: |
npx baml-cli generate
incremental: true
- text: "Enable BAML logging for this section"
command: |
export BAML_LOG=debug
- text: "Add the CLI interface"
file: {src: ./walkthrough/01-cli.ts, dest: src/cli.ts}
- text: "Update index.ts to use the CLI"
file: {src: ./walkthrough/01-index.ts, dest: src/index.ts}
- text: "Add the agent implementation"
file: {src: ./walkthrough/01-agent.ts, dest: src/agent.ts}
- text: |
The the BAML code is configured to use OPENAI_API_KEY by default
As you're testing, you can change the model / provider to something else
as you please
client "openai/gpt-4o"
[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)
For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini)
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key.
command: |
export OPENAI_API_KEY=...
- text: "Try it out"
command: |
npx tsx src/index.ts hello
results:
- text: you should see a familiar response from the model
code: |
{
intent: 'done_for_now',
message: 'Hello! How can I assist you today?'
}
- name: calculator-tools
title: "Chapter 2 - Add Calculator Tools"
text: "Let's add some calculator tools to our agent."
steps:
- text: |
Let's start by adding a tool definition for the calculator
These are simpile structured outputs that we'll ask the model to
return as a "next step" in the agentic loop.
file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml}
- text: |
Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps
file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml}
- text: "Generate updated BAML client"
command: |
npx baml-cli generate
incremental: true
- text: "Try out the calculator"
command: |
npx tsx src/index.ts 'can you add 3 and 4'
results:
- text: "You should see a tool call to the calculator"
code: |
{
intent: 'add',
a: 3,
b: 4
}
- name: tool-loop
title: "Chapter 3 - Process Tool Calls in a Loop"
text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM."
steps:
- text: |
First, lets update the agent to handle the tool call
file: {src: ./walkthrough/03-agent.ts, dest: src/agent.ts}
- text: |
Now, lets try it out
command: |
npx tsx src/index.ts 'can you add 3 and 4'
results:
- text: you should see the agent call the tool and then return the result
code: |
{
intent: 'done_for_now',
message: 'The sum of 3 and 4 is 7.'
}
- text: "For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output"
command: |
export BAML_LOG=off
- text: "Try a multi-step calculation"
command: |
npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result'
- text: "you'll notice that tools like multiply and divide are not available"
command: |
npx tsx src/index.ts 'can you multiply 3 and 4'
- text: |
next, let's add handlers for the rest of the calculator tools
file: {src: ./walkthrough/03b-agent.ts, dest: src/agent.ts}
- text: "Test subtraction"
command: |
npx tsx src/index.ts 'can you subtract 3 from 4'
- text: |
now, let's test the multiplication tool
command: |
npx tsx src/index.ts 'can you multiply 3 and 4'
- text: |
finally, let's test a more complex calculation with multiple operations
command: |
npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
- name: baml-tests
title: "Chapter 4 - Add Tests to agent.baml"
text: "Let's add some tests to our BAML agent."
steps:
- text: to start, leave the baml logs enabled
command: |
export BAML_LOG=debug
- text: |
next, let's add some tests to the agent
We'll start with a simple test that checks the agent's ability to handle
a basic calculation.
file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml}
- text: "Run the tests"
command: |
npx baml-cli test
- text: |
now, let's improve the test with assertions!
Assertions are a great way to make sure the agent is working as expected,
and can easily be extended to check for more complex behavior.
file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml}
- text: "Run the tests"
command: |
npx baml-cli test
- text: |
as you add more tests, you can disable the logs to keep the output clean.
You may want to turn them on as you iterate on specific tests.
command: |
export BAML_LOG=off
- text: |
now, let's add some more complex test cases,
where we resume from in the middle of an in-progress
agentic context window
file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml}
- text: |
let's try to run it
command: |
npx baml-cli test
- name: human-tools
title: "Chapter 5 - Multiple Human Tools"
text: |
In this section, we'll add support for multiple tools that serve to
contact humans.
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: |
first, let's add a tool that can request clarification from a human
this will be different from the "done_for_now" tool,
and can be used to more flexibly handle different types of human interactions
in your agent.
file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml}
- text: |
next, let's re-generate the client code
NOTE - if you're using the VSCode extension for BAML,
the client will be regenerated automatically when you save the file
in your editor.
command: |
npx baml-cli generate
incremental: true
- text: |
now, let's update the agent to use the new tool
file: {src: ./walkthrough/05-agent.ts, dest: src/agent.ts}
- text: |
next, let's update the CLI to handle clarification requests
by requesting input from the user on the CLI
file: {src: ./walkthrough/05-cli.ts, dest: src/cli.ts}
- text: |
let's try it out
command: |
npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& '
- text: |
next, let's add a test that checks the agent's ability to handle
a clarification request
file: {src: ./walkthrough/05b-agent.baml, dest: baml_src/agent.baml}
- text: |
and now we can run the tests again
command: |
npx baml-cli test
- text: |
you'll notice the new test passes, but the hello world test fails
This is because the agent's default behavior is to return "done_for_now"
file: {src: ./walkthrough/05c-agent.baml, dest: baml_src/agent.baml}
- text: "Verify tests pass"
command: |
npx baml-cli test
- name: customize-prompt
title: "Chapter 6 - Customize Your Prompt with Reasoning"
text: |
In this section, we'll explore how to customize the prompt of the agent
with reasoning steps.
this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)
there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts)
steps:
- text: "for this section, it will be helpful to leave the baml logs enabled"
command: |
export BAML_LOG=debug
- text: |
update the agent prompt to include a reasoning step
file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml}
- text: generate the updated client
command: |
npx baml-cli generate
incremental: true
- text: |
now, you can try it out with a simple prompt
command: |
npx tsx src/index.ts 'can you multiply 3 and 4'
results:
- text: you should see output from the baml logs showing the reasoning steps
- text: |
#### optional challenge
add a field to your tool output format that includes the reasoning steps in the output!
- name: context-window
title: "Chapter 7 - Customize Your Context Window"
text: |
In this section, we'll explore how to customize the context window
of the agent.
this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)
steps:
- text: |
update the agent to pretty-print the Context window for the model
file: {src: ./walkthrough/07-agent.ts, dest: src/agent.ts}
- text: "Test the formatting"
command: |
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
- text: |
next, let's update the agent to use XML formatting instead
this is a very popular format for passing data to a model,
among other things, because of the token efficiency of XML.
file: {src: ./walkthrough/07b-agent.ts, dest: src/agent.ts}
- text: |
let's try it out
command: |
BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result'
- text: |
lets update our tests to match the new output format
file: {src: ./walkthrough/07c-agent.baml, dest: baml_src/agent.baml}
- text: |
check out the updated tests
command: |
npx baml-cli test
- name: api-endpoints
title: "Chapter 8 - Adding API Endpoints"
text: "Add an Express server to expose the agent via HTTP."
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: "Install Express and types"
command: |
npm install express && npm install --save-dev @types/express supertest
incremental: true
- text: "Add the server implementation"
file: {src: ./walkthrough/08-server.ts, dest: src/server.ts}
- text: "Start the server"
command: |
npx tsx src/server.ts
- text: "Test with curl (in another terminal)"
command: |
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you add 3 and 4"}'
results:
- text: |
You should get an answer from the agent which includes the
agentic trace, ending in a message like:
code: |
{"intent":"done_for_now","message":"The sum of 3 and 4 is 7."}
- name: state-management
title: "Chapter 9 - In-Memory State and Async Clarification"
text: "Add state management and async clarification support."
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: "Add some simple in-memory state management for threads"
file: {src: ./walkthrough/09-state.ts, dest: src/state.ts}
- text: |
update the server to use the state management
* Add thread state management using `ThreadStore`
* return thread IDs and response URLs from the /thread endpoint
* implement GET /thread/:id
* implement POST /thread/:id/response
file: {src: ./walkthrough/09-server.ts, dest: src/server.ts}
- text: "Start the server"
command: |
npx tsx src/server.ts
- text: "Test clarification flow"
command: |
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you multiply 3 and xyz"}'
- name: human-approval
title: "Chapter 10 - Adding Human Approval"
text: "Add support for human approval of operations."
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: |
update the server to handle human approvals
* Import `handleNextStep` to execute approved actions
* Add two payload types to distinguish approvals from responses
* Handle responses and approvals differently in the endpoint
* Show better error messages when things go wrongs
file: {src: ./walkthrough/10-server.ts, dest: src/server.ts}
- text: "Add a few methods to the agent to handle approvals and responses"
file: {src: ./walkthrough/10-agent.ts, dest: src/agent.ts}
- text: "Start the server"
command: |
npx tsx src/server.ts
- text: "Test division with approval"
command: |
curl -X POST http://localhost:3000/thread \
-H "Content-Type: application/json" \
-d '{"message":"can you divide 3 by 4"}'
results:
- text: "You should see:"
code: |
{
"thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b",
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
}
]
}
- text: "reject the request with another curl call, changing the thread ID"
command: |
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}'
results:
- text: 'You should see: the last tool call is now `"intent":"divide","a":3,"b":5`'
code: |
{
"events": [
{
"type": "user_input",
"data": "can you divide 3 by 4"
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 4,
"response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response"
}
},
{
"type": "tool_response",
"data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\""
},
{
"type": "tool_call",
"data": {
"intent": "divide",
"a": 3,
"b": 5,
"response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response"
}
}
]
}
- text: "now you can approve the operation"
command: |
curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \
-H "Content-Type: application/json" \
-d '{"type": "approval", "approved": true}'
results:
- text: "you should see the final message includes the tool response and final result!"
code: |
...
{
"type": "tool_response",
"data": 0.5
},
{
"type": "done_for_now",
"message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!",
"response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response"
}
- name: humanlayer-approval
title: "Chapter 11 - Human Approvals over email"
text: |
in this section, we'll add support for human approvals over email.
This will start a little bit contrived, just to get the concepts down -
We'll start by invoking the workflow from the CLI but approvals for `divide`
and `request_more_information` will be handled over email,
then the final `done_for_now` answer will be printed back to the CLI
While contrived, this is a great example of the flexibility you get from
[factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md)
steps:
- text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details."
command: |
export BAML_LOG=off
- text: "Install HumanLayer"
command: |
npm install humanlayer
incremental: true
- text: "Update CLI to send `divide` and `request_more_information` to a human via email"
file: {src: ./walkthrough/11-cli.ts, dest: src/cli.ts}
- text: "Run the CLI"
command: |
npx tsx src/index.ts 'can you divide 4 by 5'
results:
- text: "The last line of your program should mention human review step"
code: |
nextStep { intent: 'divide', a: 4, b: 5 }
HumanLayer: Requested human approval from HumanLayer cloud
- text: |
go ahead and respond to the email with some feedback:

- text: |
you should get another email with an updated attempt based on your feedback!
You can go ahead and approve this one:

results:
- text: and your final output will look like
code: |
nextStep {
intent: 'done_for_now',
message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!'
}
The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!
- text: |
lets implement the `request_more_information` flow as well
file: {src: ./walkthrough/11b-cli.ts, dest: src/cli.ts}
- text: |
lets test the require_approval flow as by asking for a calculation
with garbled input:
command: |
npx tsx src/index.ts 'can you multiply 4 and xyz'
- text: "You should get an email with a request for clarification"
command: |
Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else?
- text: you can response with something like
command: |
use 8 instead of xyz
results:
- text: you should see a final result on the CLI like
code: |
I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32.
- text: |
as a final step, lets explore using a custom html template for the email
file: {src: ./walkthrough/11c-cli.ts, dest: src/cli.ts}
- text: |
first try with divide:
command: |
npx tsx src/index.ts 'can you divide 4 by 5'
results:
- text: |
you should see a slightly different email with the custom template

feel free to run with the flow and then you can try updating the template to your liking
(if you're using cursor, something as simple as highlighting the template and asking to "make it better"
should do the trick)
try triggering "request_more_information" as well!
- text: |
thats it - in the next chapter, we'll build a fully email-driven
workflow agent that uses webhooks for human approval
- name: humanlayer-webhook
title: "Chapter XX - HumanLayer Webhook Integration"
text: |
the previous sections used the humanlayer SDK in "synchronous mode" - that
means every time we wait for human approval, we sit in a loop
polling until the human response if received.
That's obviously not ideal, especially for production workloads,
so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md)
by updating the server to end processing after contacting a human, and use webhooks to receive the results.
steps:
- text: |
add code to initialize humanlayer in the server
file: {src: ./walkthrough/12-1-server-init.ts, dest: src/server.ts}
- text: |
next, lets update the /thread endpoint to
1. handle requests asynchronously, returning immediately
2. create a human contact on request_more_information and done_for_now calls
# file: {src: }
- text: |
Update the server to be able to handle request_clarification responses
- remove the old /response endpoint and types
- update the /thread endpoint to run processing asynchronously, return immediately
- send a state.threadId when requesting human responses
- add a handleHumanResponse function to process the human response
- add a /webhook endpoint to handle the webhook response
file: {src: ./walkthrough/12a-server.ts, dest: src/server.ts}
- text: "Start the server in another terminal"
command: |
npx tsx src/server.ts
- text: |
now that the server is running, send a payload to the '/thread' endpoint
- text: __ do the response step
- text: __ now handle approvals for divide
- text: __ now also handle done_for_now
================================================
FILE: workshops/2025-05-17/.gitignore
================================================
baml_src/*.baml
src/*.ts
package.json
package-lock.json
tsconfig.json
build/
================================================
FILE: workshops/2025-05-17/sections/00-hello-world/README.md
================================================
# Chapter 0 - Hello World
Let's start with a basic TypeScript setup and a hello world program.
This guide is written in TypeScript (yes, a python version is coming soon)
There are many checkpoints between the every file edit in theworkshop steps,
so even if you aren't super familiar with typescript,
you should be able to keep up and run each example.
To run this guide, you'll need a relatively recent version of nodejs and npm installed
You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine
brew install node@20
You should see the node version
node --version
Copy initial package.json
cp ./walkthrough/00-package.json package.json
show file
```json
// ./walkthrough/00-package.json
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
```
Install dependencies
npm install
Copy tsconfig.json
cp ./walkthrough/00-tsconfig.json tsconfig.json
show file
```json
// ./walkthrough/00-tsconfig.json
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
```
add .gitignore
cp ./walkthrough/00-.gitignore .gitignore
show file
```gitignore
// ./walkthrough/00-.gitignore
baml_client/
node_modules/
```
Create src folder
mkdir -p src
Add a simple hello world index.ts
cp ./walkthrough/00-index.ts src/index.ts
show file
```ts
// ./walkthrough/00-index.ts
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
```
Run it to verify
npx tsx src/index.ts
You should see:
hello, world!
================================================
FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-index.ts
================================================
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-package.json
================================================
{
"name": "my-agent",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "tsx src/index.ts",
"build": "tsc"
},
"dependencies": {
"tsx": "^4.15.0",
"typescript": "^5.0.0"
},
"devDependencies": {
"@types/node": "^20.0.0",
"@typescript-eslint/eslint-plugin": "^6.0.0",
"@typescript-eslint/parser": "^6.0.0",
"eslint": "^8.0.0"
}
}
================================================
FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-tsconfig.json
================================================
{
"compilerOptions": {
"target": "ES2017",
"lib": ["esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "bundler",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"plugins": [],
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
"exclude": ["node_modules", "walkthrough"]
}
================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/README.md
================================================
# Chapter 1 - CLI and Agent Loop
Now let's add BAML and create our first agent with a CLI interface.
First, we'll need to install [BAML](https://github.com/boundaryml/baml)
which is a tool for prompting and structured outputs.
npm install @boundaryml/baml
Initialize BAML
npx baml-cli init
Remove default resume.baml
rm baml_src/resume.baml
Add our starter agent, a single baml prompt that we'll build on
cp ./walkthrough/01-agent.baml baml_src/agent.baml
show file
```rust
// ./walkthrough/01-agent.baml
class DoneForNow {
intent "done_for_now"
message string
}
client Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
```
Generate BAML client code
npx baml-cli generate
Enable BAML logging for this section
export BAML_LOG=debug
Add the CLI interface
cp ./walkthrough/01-cli.ts src/cli.ts
show file
```ts
// ./walkthrough/01-cli.ts
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
```
Update index.ts to use the CLI
```diff
src/index.ts
+import { cli } from "./cli"
+
async function hello(): Promise {
console.log('hello, world!')
async function main() {
- await hello()
+ await cli()
}
```
skip this step
cp ./walkthrough/01-index.ts src/index.ts
Add the agent implementation
cp ./walkthrough/01-agent.ts src/agent.ts
show file
```ts
// ./walkthrough/01-agent.ts
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
```
The the BAML code is configured to use BASETEN_API_KEY by default
To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co),
and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/).
```rust
function DetermineNextStep(thread: string) -> DoneForNow {
client Qwen3
// ...
```
If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key.
If you want to try swapping out the model, you can change the `client` line.
[Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms)
For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini)
or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider.
For example, to use openai with an OPENAI_API_KEY, you can do:
client "openai/gpt-4o"
Set your env vars
export BASETEN_API_KEY=...
export BASETEN_BASE_URL=...
Try it out
npx tsx src/index.ts hello
you should see a familiar response from the model
{
intent: 'done_for_now',
message: 'Hello! How can I assist you today?'
}
================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/src/index.ts
================================================
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await hello()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
client Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-agent.ts
================================================
import { b } from "../baml_client";
// tool call or a respond to human tool
type AgentResponse = Awaited>;
export interface Event {
type: string
data: any;
}
export class Thread {
events: Event[] = [];
constructor(events: Event[]) {
this.events = events;
}
serializeForLLM() {
// can change this to whatever custom serialization you want to do, XML, etc
// e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105
return JSON.stringify(this.events);
}
}
// right now this just runs one turn with the LLM, but
// we'll update this function to handle all the agent logic
export async function agentLoop(thread: Thread): Promise {
const nextStep = await b.DetermineNextStep(thread.serializeForLLM());
return nextStep;
}
================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-cli.ts
================================================
// cli.ts lets you invoke the agent loop from the command line
import { agentLoop, Thread, Event } from "./agent";
export async function cli() {
// Get command line arguments, skipping the first two (node and script name)
const args = process.argv.slice(2);
if (args.length === 0) {
console.error("Error: Please provide a message as a command line argument");
process.exit(1);
}
// Join all arguments into a single message
const message = args.join(" ");
// Create a new thread with the user's message as the initial event
const thread = new Thread([{ type: "user_input", data: message }]);
// Run the agent loop with the thread
const result = await agentLoop(thread);
console.log(result);
}
================================================
FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-index.ts
================================================
import { cli } from "./cli"
async function hello(): Promise {
console.log('hello, world!')
}
async function main() {
await cli()
}
main().catch(console.error)
================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/.gitignore
================================================
baml_client/
node_modules/
================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/README.md
================================================
# Chapter 2 - Add Calculator Tools
Let's add some calculator tools to our agent.
Let's start by adding a tool definition for the calculator
These are simpile structured outputs that we'll ask the model to
return as a "next step" in the agentic loop.
cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
show file
```rust
// ./walkthrough/02-tool_calculator.baml
type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool
class AddTool {
intent "add"
a int | float
b int | float
}
class SubtractTool {
intent "subtract"
a int | float
b int | float
}
class MultiplyTool {
intent "multiply"
a int | float
b int | float
}
class DivideTool {
intent "divide"
a int | float
b int | float
}
```
Now, let's update the agent's DetermineNextStep method to
expose the calculator tools as potential next steps
```diff
baml_src/agent.baml
function DetermineNextStep(
thread: string
-) -> DoneForNow {
+) -> CalculatorTools | DoneForNow {
client Qwen3
```
skip this step
cp ./walkthrough/02-agent.baml baml_src/agent.baml
Generate updated BAML client
npx baml-cli generate
Try out the calculator
npx tsx src/index.ts 'can you add 3 and 4'
You should see a tool call to the calculator
{
intent: 'add',
a: 3,
b: 4
}
================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/baml_src/agent.baml
================================================
class DoneForNow {
intent "done_for_now"
message string
}
client Qwen3 {
provider "openai-generic"
options {
base_url env.BASETEN_BASE_URL
api_key env.BASETEN_API_KEY
}
}
function DetermineNextStep(
thread: string
) -> DoneForNow {
client Qwen3
// use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended))
prompt #"
{{ _.role("system") }}
/nothink
You are a helpful assistant that can help with tasks.
{{ _.role("user") }}
You are working on the following thread:
{{ thread }}
What should the next step be?
{{ ctx.output_format }}
"#
}
test HelloWorld {
functions [DetermineNextStep]
args {
thread #"
{
"type": "user_input",
"data": "hello!"
}
"#
}
}
================================================
FILE: workshops/2025-05-17/sections/02-calculator-tools/baml_src/clients.baml
================================================
// Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview
client CustomGPT4o {
provider openai
options {
model "gpt-4o"
api_key env.OPENAI_API_KEY
}
}
client CustomGPT4oMini {
provider openai
retry_policy Exponential
options {
model "gpt-4o-mini"
api_key env.OPENAI_API_KEY
}
}
client