Repository: humanlayer/12-factor-agents Branch: main Commit: d20c728368bf Files: 429 Total size: 1.0 MB Directory structure: gitextract_8e8ds6gd/ ├── .gitignore ├── CLAUDE.md ├── LICENSE ├── Makefile ├── README.md ├── content/ │ ├── appendix-13-pre-fetch.md │ ├── brief-history-of-software.md │ ├── factor-01-natural-language-to-tool-calls.md │ ├── factor-02-own-your-prompts.md │ ├── factor-03-own-your-context-window.md │ ├── factor-04-tools-are-structured-outputs.md │ ├── factor-05-unify-execution-state.md │ ├── factor-06-launch-pause-resume.md │ ├── factor-07-contact-humans-with-tools.md │ ├── factor-08-own-your-control-flow.md │ ├── factor-09-compact-errors.md │ ├── factor-1-natural-language-to-tool-calls.md │ ├── factor-10-small-focused-agents.md │ ├── factor-11-trigger-from-anywhere.md │ ├── factor-12-stateless-reducer.md │ ├── factor-2-own-your-prompts.md │ ├── factor-3-own-your-context-window.md │ ├── factor-4-tools-are-structured-outputs.md │ ├── factor-5-unify-execution-state.md │ ├── factor-6-launch-pause-resume.md │ ├── factor-7-contact-humans-with-tools.md │ ├── factor-8-own-your-control-flow.md │ └── factor-9-compact-errors.md ├── drafts/ │ ├── a2h-spec.md │ └── ah2-openapi.json ├── hack/ │ └── contributors_markdown/ │ ├── .python-version │ ├── README.md │ ├── contributors_markdown.py │ └── pyproject.toml ├── packages/ │ ├── create-12-factor-agent/ │ │ └── template/ │ │ ├── .gitignore │ │ ├── README.md │ │ ├── baml_src/ │ │ │ ├── agent.baml │ │ │ ├── clients.baml │ │ │ ├── generators.baml │ │ │ └── tool_calculator.baml │ │ ├── package.json │ │ ├── src/ │ │ │ ├── a2h.ts │ │ │ ├── agent.ts │ │ │ ├── cli.ts │ │ │ ├── index.ts │ │ │ ├── server.ts │ │ │ └── state.ts │ │ └── tsconfig.json │ └── walkthroughgen/ │ ├── .gitignore │ ├── examples/ │ │ ├── typescript/ │ │ │ ├── .gitignore │ │ │ ├── walkthrough/ │ │ │ │ ├── 00-package-lock.json │ │ │ │ ├── 00-package.json │ │ │ │ ├── 00-tsconfig.json │ │ │ │ ├── 01-index.ts │ │ │ │ ├── 02-cli.ts │ │ │ │ └── 02-index.ts │ │ │ └── walkthrough.yaml │ │ └── walkthroughgen/ │ │ └── walkthrough.yaml │ ├── jest.config.js │ ├── package.json │ ├── prompt.md │ ├── readme.md │ ├── src/ │ │ ├── cli.ts │ │ └── index.ts │ ├── test/ │ │ ├── e2e/ │ │ │ └── test-e2e.ts │ │ └── utils/ │ │ ├── console-mock.ts │ │ └── temp-dir.ts │ └── tsconfig.json └── workshops/ ├── .gitignore ├── .python-version ├── 2025-05/ │ ├── .gitignore │ ├── Makefile │ ├── final/ │ │ ├── .gitignore │ │ ├── baml_src/ │ │ │ ├── agent.baml │ │ │ ├── clients.baml │ │ │ ├── generators.baml │ │ │ └── tool_calculator.baml │ │ ├── package.json │ │ ├── src/ │ │ │ ├── agent.ts │ │ │ ├── cli.ts │ │ │ ├── index.ts │ │ │ ├── server.ts │ │ │ └── state.ts │ │ └── tsconfig.json │ ├── sections/ │ │ ├── 00-hello-world/ │ │ │ ├── README.md │ │ │ └── walkthrough/ │ │ │ ├── 00-.gitignore │ │ │ ├── 00-index.ts │ │ │ ├── 00-package.json │ │ │ └── 00-tsconfig.json │ │ ├── 01-cli-and-agent/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ └── index.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 01-agent.baml │ │ │ ├── 01-agent.ts │ │ │ ├── 01-cli.ts │ │ │ └── 01-index.ts │ │ ├── 02-calculator-tools/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ └── generators.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ └── index.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 02-agent.baml │ │ │ └── 02-tool_calculator.baml │ │ ├── 03-tool-loop/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ └── index.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 03-agent.ts │ │ │ └── 03b-agent.ts │ │ ├── 04-baml-tests/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ └── index.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 04-agent.baml │ │ │ ├── 04b-agent.baml │ │ │ └── 04c-agent.baml │ │ ├── 05-human-tools/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ └── index.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 05-agent.baml │ │ │ ├── 05-agent.ts │ │ │ ├── 05-cli.ts │ │ │ ├── 05b-agent.baml │ │ │ └── 05c-agent.baml │ │ ├── 06-customize-prompt/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ └── index.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ └── 06-agent.baml │ │ ├── 07-context-window/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ └── index.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 07-agent.ts │ │ │ ├── 07b-agent.ts │ │ │ └── 07c-agent.baml │ │ ├── 08-api-endpoints/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ └── index.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ └── 08-server.ts │ │ ├── 09-state-management/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ ├── index.ts │ │ │ │ └── server.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 09-server.ts │ │ │ └── 09-state.ts │ │ ├── 10-human-approval/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ ├── index.ts │ │ │ │ ├── server.ts │ │ │ │ └── state.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 10-agent.ts │ │ │ └── 10-server.ts │ │ ├── 11-humanlayer-approval/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ ├── index.ts │ │ │ │ ├── server.ts │ │ │ │ └── state.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 11-cli.ts │ │ │ ├── 11b-cli.ts │ │ │ └── 11c-cli.ts │ │ ├── 12-humanlayer-webhook/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ ├── generators.baml │ │ │ │ └── tool_calculator.baml │ │ │ ├── package.json │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ ├── index.ts │ │ │ │ ├── server.ts │ │ │ │ └── state.ts │ │ │ ├── tsconfig.json │ │ │ └── walkthrough/ │ │ │ ├── 12-1-server-init.ts │ │ │ └── 12a-server.ts │ │ └── final/ │ │ ├── .gitignore │ │ ├── README.md │ │ ├── baml_src/ │ │ │ ├── agent.baml │ │ │ ├── clients.baml │ │ │ ├── generators.baml │ │ │ └── tool_calculator.baml │ │ ├── package.json │ │ ├── src/ │ │ │ ├── agent.ts │ │ │ ├── cli.ts │ │ │ ├── index.ts │ │ │ ├── server.ts │ │ │ └── state.ts │ │ └── tsconfig.json │ ├── walkthrough/ │ │ ├── 00-.gitignore │ │ ├── 00-index.ts │ │ ├── 00-package.json │ │ ├── 00-tsconfig.json │ │ ├── 01-agent.baml │ │ ├── 01-agent.ts │ │ ├── 01-cli.ts │ │ ├── 01-index.ts │ │ ├── 02-agent.baml │ │ ├── 02-tool_calculator.baml │ │ ├── 03-agent.ts │ │ ├── 03b-agent.ts │ │ ├── 04-agent.baml │ │ ├── 04b-agent.baml │ │ ├── 04c-agent.baml │ │ ├── 05-agent.baml │ │ ├── 05-agent.ts │ │ ├── 05-cli.ts │ │ ├── 05b-agent.baml │ │ ├── 05c-agent.baml │ │ ├── 06-agent.baml │ │ ├── 07-agent.ts │ │ ├── 07b-agent.ts │ │ ├── 07c-agent.baml │ │ ├── 08-server.ts │ │ ├── 09-server.ts │ │ ├── 09-state.ts │ │ ├── 10-agent.ts │ │ ├── 10-server.ts │ │ ├── 11-cli.ts │ │ ├── 11b-cli.ts │ │ ├── 11c-cli.ts │ │ ├── 12-1-server-init.ts │ │ ├── 12-server.ts │ │ ├── 12a-server.ts │ │ ├── 12aa-server.ts │ │ └── 12b-server.ts │ ├── walkthrough.md │ └── walkthrough.yaml ├── 2025-05-17/ │ ├── .gitignore │ ├── sections/ │ │ ├── 00-hello-world/ │ │ │ ├── README.md │ │ │ └── walkthrough/ │ │ │ ├── 00-.gitignore │ │ │ ├── 00-index.ts │ │ │ ├── 00-package.json │ │ │ └── 00-tsconfig.json │ │ ├── 01-cli-and-agent/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── src/ │ │ │ │ └── index.ts │ │ │ └── walkthrough/ │ │ │ ├── 01-agent.baml │ │ │ ├── 01-agent.ts │ │ │ ├── 01-cli.ts │ │ │ └── 01-index.ts │ │ ├── 02-calculator-tools/ │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ ├── baml_src/ │ │ │ │ ├── agent.baml │ │ │ │ ├── clients.baml │ │ │ │ └── generators.baml │ │ │ ├── src/ │ │ │ │ ├── agent.ts │ │ │ │ ├── cli.ts │ │ │ │ └── index.ts │ │ │ └── walkthrough/ │ │ │ ├── 02-agent.baml │ │ │ └── 02-tool_calculator.baml │ │ └── 03-tool-loop/ │ │ ├── .gitignore │ │ ├── README.md │ │ ├── baml_src/ │ │ │ ├── agent.baml │ │ │ ├── clients.baml │ │ │ ├── generators.baml │ │ │ └── tool_calculator.baml │ │ ├── src/ │ │ │ ├── agent.ts │ │ │ ├── cli.ts │ │ │ └── index.ts │ │ └── walkthrough/ │ │ ├── 03-agent.ts │ │ └── 03b-agent.ts │ ├── walkthrough/ │ │ ├── 00-.gitignore │ │ ├── 00-index.ts │ │ ├── 00-package.json │ │ ├── 00-tsconfig.json │ │ ├── 01-agent.baml │ │ ├── 01-agent.ts │ │ ├── 01-cli.ts │ │ ├── 01-index.ts │ │ ├── 02-agent.baml │ │ ├── 02-tool_calculator.baml │ │ ├── 03-agent.ts │ │ ├── 03b-agent.ts │ │ ├── 04-agent.baml │ │ ├── 04b-agent.baml │ │ ├── 04c-agent.baml │ │ ├── 05-agent.baml │ │ ├── 05-agent.ts │ │ ├── 05-cli.ts │ │ ├── 05b-agent.baml │ │ ├── 05c-agent.baml │ │ ├── 06-agent.baml │ │ ├── 07-agent.ts │ │ ├── 07b-agent.ts │ │ ├── 07c-agent.baml │ │ ├── 08-server.ts │ │ ├── 09-server.ts │ │ ├── 09-state.ts │ │ ├── 10-agent.ts │ │ ├── 10-server.ts │ │ ├── 11-cli.ts │ │ ├── 11b-cli.ts │ │ ├── 11c-cli.ts │ │ ├── 12-1-server-init.ts │ │ ├── 12-server.ts │ │ ├── 12a-server.ts │ │ ├── 12aa-server.ts │ │ └── 12b-server.ts │ ├── walkthrough.md │ └── walkthrough.yaml └── 2025-07-16/ ├── .gitignore ├── CLAUDE.md ├── hack/ │ ├── analyze_log_capture.py │ ├── inspect_notebook.py │ ├── minimal_test.ipynb │ ├── test_log_capture.sh │ └── testing.md ├── pyproject.toml ├── test_notebook_colab_sim.sh ├── walkthrough/ │ ├── 00-.gitignore │ ├── 00-main.py │ ├── 00-package.json │ ├── 00-tsconfig.json │ ├── 01-agent.baml │ ├── 01-agent.py │ ├── 01-main.py │ ├── 02-agent.baml │ ├── 02-main.py │ ├── 02-tool_calculator.baml │ ├── 03-agent.py │ ├── 03-main.py │ ├── 03b-agent.py │ ├── 03b-agent.ts │ ├── 04-agent.baml │ ├── 04b-agent.baml │ ├── 04c-agent.baml │ ├── 05-agent.baml │ ├── 05-agent.py │ ├── 05-main.py │ ├── 05b-agent.baml │ ├── 05c-agent.baml │ ├── 06-agent.baml │ ├── 07-agent.py │ ├── 07-main.py │ ├── 07b-agent.ts │ ├── 07c-agent.baml │ ├── 08-server.ts │ ├── 09-server.ts │ ├── 09-state.ts │ ├── 10-agent.ts │ ├── 10-server.ts │ ├── 11-cli.ts │ ├── 11b-cli.ts │ ├── 11c-cli.ts │ ├── 12-1-server-init.ts │ ├── 12-server.ts │ ├── 12a-server.ts │ ├── 12aa-server.ts │ └── 12b-server.ts ├── walkthrough.yaml ├── walkthrough_python_enhanced.yaml ├── walkthroughgen_py.py └── workshop_final.ipynb ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ .promptx ================================================ FILE: CLAUDE.md ================================================ # AI Assistant Instructions **IMPORTANT: Copy or merge this file into your project's CLAUDE.md file to activate agent personas.** ## 🚨 MANDATORY PERSONA SELECTION **CRITICAL: You MUST adopt one of the specialized personas before proceeding with any work.** **BEFORE DOING ANYTHING ELSE**, you must read and adopt one of these personas: 1. **Developer Agent** - Read `.promptx/personas/agent-developer.md` - For coding, debugging, and implementation tasks 2. **Code Reviewer Agent** - Read `.promptx/personas/agent-code-reviewer.md` - For reviewing code changes and quality assurance 3. **Rebaser Agent** - Read `.promptx/personas/agent-rebaser.md` - For cleaning git history and rebasing changes 4. **Merger Agent** - Read `.promptx/personas/agent-merger.md` - For merging code across branches 5. **Multiplan Manager Agent** - Read `.promptx/personas/agent-multiplan-manager.md` - For orchestrating parallel work and creating plans **DO NOT PROCEED WITHOUT SELECTING A PERSONA.** Each persona has specific rules, workflows, and tools that you MUST follow exactly. ## How to Choose Your Persona - **Asked to write code, fix bugs, or implement features?** → Use Developer Agent - **Asked to review code changes?** → Use Code Reviewer Agent - **Asked to clean git history or rebase changes?** → Use Rebaser Agent - **Asked to merge branches or consolidate work?** → Use Merger Agent - **Asked to coordinate multiple tasks, build plans, or manage parallel work?** → Use Multiplan Manager Agent ## Project Context [CUSTOMIZE THIS SECTION FOR YOUR PROJECT] This project uses: - **Language/Framework**: [Add your stack here] - **Build Tool**: [Add your build commands] - **Testing**: [Add your test commands] - **Architecture**: [Describe your project structure] ## Core Principles (All Personas) 1. **READ FIRST**: Always read at least 1500 lines to understand context fully 2. **DELETE MORE THAN YOU ADD**: Complexity compounds into disasters 3. **FOLLOW EXISTING PATTERNS**: Don't invent new approaches 4. **BUILD AND TEST**: Run your build and test commands after changes 5. **COMMIT FREQUENTLY**: Every 5-10 minutes for meaningful progress ## File Structure Reference [CUSTOMIZE THIS SECTION FOR YOUR PROJECT] ``` ./ ├── package.json # [or your dependency file] ├── src/ # [your source directory] │ ├── [your modules] │ └── [your files] ├── test/ # [your test directory] ├── .promptx/ # Agent personas (created by promptx init) │ └── personas/ └── CLAUDE.md # This file (after merging) ``` ## Common Commands (All Personas) [CUSTOMIZE THIS SECTION FOR YOUR PROJECT] ```bash # Build project [your build command] # Run tests [your test command] # Lint code [your lint command] # Deploy locally [your deploy command] ``` ## CRITICAL REMINDER **You CANNOT proceed without adopting a persona.** Each persona has: - Specific workflows and rules - Required tools and commands - Success criteria and verification steps - Commit and progress requirements **Choose your persona now and follow its instructions exactly.** --- *Generated by promptx - Agent personas are in .promptx/personas/* ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of control, an entity is assumed to be under the control of another entity if the entity that controls the other entity owns directly or indirectly fifty percent (50%) or more of the outstanding shares, or if there is some other contractual arrangement whereby the first entity effectively controls the management decisions of the other entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (which shall not include Communication that is clearly marked or otherwise designated in writing by the copyright owner as "Not a Contribution"). "Derivative Works" shall mean any work, whether in Source or Object form, that is based upon (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and separate works based upon the Work. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is clearly marked or otherwise designated in writing by the copyright owner as "Not a Contribution." 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to use, reproduce, modify, merge, publish, distribute, sublicense, and/or sell copies of the Work, and to permit persons to whom the Work is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Work. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, trademark, patent, attribution and other notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright notice to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Support. You can choose to offer, and charge a fee for, warranty, support, indemnity or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or support. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in comments for the particular file format. (We recommend that you include a file named COPYING in your distribution, which contains the complete text of the license.) Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: Makefile ================================================ # Makefile for launch compatibility .PHONY: setup teardown setup: @echo "Setting up project..." @npm install || bun install || yarn install @echo "Setup complete!" teardown: @echo "Tearing down project..." @rm -rf node_modules @echo "Teardown complete!" ================================================ FILE: README.md ================================================ # 12-Factor Agents - Principles for building reliable LLM applications
Code License: Apache 2.0 Content License: CC BY-SA 4.0 Discord Server YouTube
Deep Dive YouTube
Deep Dive

*In the spirit of [12 Factor Apps](https://12factor.net/)*. *The source for this project is public at https://github.com/humanlayer/12-factor-agents, and I welcome your feedback and contributions. Let's figure this out together!* > [!TIP] > Missed the AI Engineer World's Fair? [Catch the talk here](https://www.youtube.com/watch?v=8kMaTybvDUw) > > Looking for Context Engineering? [Jump straight to factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) > > Want to contribute to `npx/uvx create-12-factor-agent` - check out [the discussion thread](https://github.com/humanlayer/12-factor-agents/discussions/61) Screenshot 2025-04-03 at 2 49 07 PM Hi, I'm Dex. I've been [hacking](https://youtu.be/8bIHcttkOTE) on [AI agents](https://theouterloop.substack.com) for [a while](https://humanlayer.dev). **I've tried every agent framework out there**, from the plug-and-play crew/langchains to the "minimalist" smolagents of the world to the "production grade" langraph, griptape, etc. **I've talked to a lot of really strong founders**, in and out of YC, who are all building really impressive things with AI. Most of them are rolling the stack themselves. I don't see a lot of frameworks in production customer-facing agents. **I've been surprised to find** that most of the products out there billing themselves as "AI Agents" are not all that agentic. A lot of them are mostly deterministic code, with LLM steps sprinkled in at just the right points to make the experience truly magical. Agents, at least the good ones, don't follow the ["here's your prompt, here's a bag of tools, loop until you hit the goal"](https://www.anthropic.com/engineering/building-effective-agents#agents) pattern. Rather, they are comprised of mostly just software. So, I set out to answer: > ### **What are the principles we can use to build LLM-powered software that is actually good enough to put in the hands of production customers?** Welcome to 12-factor agents. As every Chicago mayor since Daley has consistently plastered all over the city's major airports, we're glad you're here. *Special thanks to [@iantbutler01](https://github.com/iantbutler01), [@tnm](https://github.com/tnm), [@hellovai](https://www.github.com/hellovai), [@stantonk](https://www.github.com/stantonk), [@balanceiskey](https://www.github.com/balanceiskey), [@AdjectiveAllison](https://www.github.com/AdjectiveAllison), [@pfbyjy](https://www.github.com/pfbyjy), [@a-churchill](https://www.github.com/a-churchill), and the SF MLOps community for early feedback on this guide.* ## The Short Version: The 12 Factors Even if LLMs [continue to get exponentially more powerful](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md#what-if-llms-get-smarter), there will be core engineering techniques that make LLM-powered software more reliable, more scalable, and easier to maintain. - [How We Got Here: A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) - [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) - [Factor 2: Own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) - [Factor 3: Own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) - [Factor 4: Tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) - [Factor 5: Unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) - [Factor 6: Launch/Pause/Resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) - [Factor 7: Contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) - [Factor 8: Own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) - [Factor 9: Compact Errors into Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) - [Factor 10: Small, Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) - [Factor 11: Trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) - [Factor 12: Make your agent a stateless reducer](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md) ### Visual Nav | | | | |----|----|-----| |[![factor 1](https://github.com/humanlayer/12-factor-agents/blob/main/img/110-natural-language-tool-calls.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) | [![factor 2](https://github.com/humanlayer/12-factor-agents/blob/main/img/120-own-your-prompts.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) | [![factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/img/130-own-your-context-building.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) | |[![factor 4](https://github.com/humanlayer/12-factor-agents/blob/main/img/140-tools-are-just-structured-outputs.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) | [![factor 5](https://github.com/humanlayer/12-factor-agents/blob/main/img/150-unify-state.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) | [![factor 6](https://github.com/humanlayer/12-factor-agents/blob/main/img/160-pause-resume-with-simple-apis.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) | | [![factor 7](https://github.com/humanlayer/12-factor-agents/blob/main/img/170-contact-humans-with-tools.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) | [![factor 8](https://github.com/humanlayer/12-factor-agents/blob/main/img/180-control-flow.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) | [![factor 9](https://github.com/humanlayer/12-factor-agents/blob/main/img/190-factor-9-errors-static.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) | | [![factor 10](https://github.com/humanlayer/12-factor-agents/blob/main/img/1a0-small-focused-agents.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) | [![factor 11](https://github.com/humanlayer/12-factor-agents/blob/main/img/1b0-trigger-from-anywhere.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) | [![factor 12](https://github.com/humanlayer/12-factor-agents/blob/main/img/1c0-stateless-reducer.png)](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md) | ## How we got here For a deeper dive on my agent journey and what led us here, check out [A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) - a quick summary here: ### The promise of agents We're gonna talk a lot about Directed Graphs (DGs) and their Acyclic friends, DAGs. I'll start by pointing out that...well...software is a directed graph. There's a reason we used to represent programs as flow charts. ![010-software-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/010-software-dag.png) ### From code to DAGs Around 20 years ago, we started to see DAG orchestrators become popular. We're talking classics like [Airflow](https://airflow.apache.org/), [Prefect](https://www.prefect.io/), some predecessors, and some newer ones like ([dagster](https://dagster.io/), [inggest](https://www.inngest.com/), [windmill](https://www.windmill.dev/)). These followed the same graph pattern, with the added benefit of observability, modularity, retries, administration, etc. ![015-dag-orchestrators](https://github.com/humanlayer/12-factor-agents/blob/main/img/015-dag-orchestrators.png) ### The promise of agents I'm not the first [person to say this](https://youtu.be/Dc99-zTMyMg?si=bcT0hIwWij2mR-40&t=73), but my biggest takeaway when I started learning about agents, was that you get to throw the DAG away. Instead of software engineers coding each step and edge case, you can give the agent a goal and a set of transitions: ![025-agent-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/025-agent-dag.png) And let the LLM make decisions in real time to figure out the path ![026-agent-dag-lines](https://github.com/humanlayer/12-factor-agents/blob/main/img/026-agent-dag-lines.png) The promise here is that you write less software, you just give the LLM the "edges" of the graph and let it figure out the nodes. You can recover from errors, you can write less code, and you may find that LLMs find novel solutions to problems. ### Agents as loops As we'll see later, it turns out this doesn't quite work. Let's dive one step deeper - with agents you've got this loop consisting of 3 steps: 1. LLM determines the next step in the workflow, outputting structured json ("tool calling") 2. Deterministic code executes the tool call 3. The result is appended to the context window 4. Repeat until the next step is determined to be "done" ```python initial_event = {"message": "..."} context = [initial_event] while True: next_step = await llm.determine_next_step(context) context.append(next_step) if (next_step.intent === "done"): return next_step.final_answer result = await execute_step(next_step) context.append(result) ``` Our initial context is just the starting event (maybe a user message, maybe a cron fired, maybe a webhook, etc), and we ask the llm to choose the next step (tool) or to determine that we're done. Here's a multi-step example: [![027-agent-loop-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif)](https://github.com/user-attachments/assets/3beb0966-fdb1-4c12-a47f-ed4e8240f8fd)
GIF Version ![027-agent-loop-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif)
## Why 12-factor agents? At the end of the day, this approach just doesn't work as well as we want it to. In building HumanLayer, I've talked to at least 100 SaaS builders (mostly technical founders) looking to make their existing product more agentic. The journey usually goes something like: 1. Decide you want to build an agent 2. Product design, UX mapping, what problems to solve 3. Want to move fast, so grab $FRAMEWORK and *get to building* 4. Get to 70-80% quality bar 5. Realize that 80% isn't good enough for most customer-facing features 6. Realize that getting past 80% requires reverse-engineering the framework, prompts, flow, etc. 7. Start over from scratch
Random Disclaimers **DISCLAIMER**: I'm not sure the exact right place to say this, but here seems as good as any: **this in BY NO MEANS meant to be a dig on either the many frameworks out there, or the pretty dang smart people who work on them**. They enable incredible things and have accelerated the AI ecosystem. I hope that one outcome of this post is that agent framework builders can learn from the journeys of myself and others, and make frameworks even better. Especially for builders who want to move fast but need deep control. **DISCLAIMER 2**: I'm not going to talk about MCP. I'm sure you can see where it fits in. **DISCLAIMER 3**: I'm using mostly typescript, for [reasons](https://www.linkedin.com/posts/dexterihorthy_llms-typescript-aiagents-activity-7290858296679313408-Lh9e?utm_source=share&utm_medium=member_desktop&rcm=ACoAAA4oHTkByAiD-wZjnGsMBUL_JT6nyyhOh30) but all this stuff works in python or any other language you prefer. Anyways back to the thing...
### Design Patterns for great LLM applications After digging through hundreds of AI libriaries and working with dozens of founders, my instinct is this: 1. There are some core things that make agents great 2. Going all in on a framework and building what is essentially a greenfield rewrite may be counter-productive 3. There are some core principles that make agents great, and you will get most/all of them if you pull in a framework 4. BUT, the fastest way I've seen for builders to get high-quality AI software in the hands of customers is to take small, modular concepts from agent building, and incorporate them into their existing product 5. These modular concepts from agents can be defined and applied by most skilled software engineers, even if they don't have an AI background > #### The fastest way I've seen for builders to get good AI software in the hands of customers is to take small, modular concepts from agent building, and incorporate them into their existing product ## The 12 Factors (again) - [How We Got Here: A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) - [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) - [Factor 2: Own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) - [Factor 3: Own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) - [Factor 4: Tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) - [Factor 5: Unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) - [Factor 6: Launch/Pause/Resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) - [Factor 7: Contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) - [Factor 8: Own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) - [Factor 9: Compact Errors into Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) - [Factor 10: Small, Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) - [Factor 11: Trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) - [Factor 12: Make your agent a stateless reducer](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md) ## Honorable Mentions / other advice - [Factor 13: Pre-fetch all the context you might need](https://github.com/humanlayer/12-factor-agents/blob/main/content/appendix-13-pre-fetch.md) ## Related Resources - Contribute to this guide [here](https://github.com/humanlayer/12-factor-agents) - [I talked about a lot of this on an episode of the Tool Use podcast](https://youtu.be/8bIHcttkOTE) in March 2025 - I write about some of this stuff at [The Outer Loop](https://theouterloop.substack.com) - I do [webinars about Maximizing LLM Performance](https://github.com/hellovai/ai-that-works/tree/main) with [@hellovai](https://github.com/hellovai) - We build OSS agents with this methodology under [got-agents/agents](https://github.com/got-agents/agents) - We ignored all our own advice and built a [framework for running distributed agents in kubernetes](https://github.com/humanlayer/kubechain) - Other links from this guide: - [12 Factor Apps](https://12factor.net) - [Building Effective Agents (Anthropic)](https://www.anthropic.com/engineering/building-effective-agents#agents) - [Prompts are Functions](https://thedataexchange.media/baml-revolution-in-ai-engineering/ ) - [Library patterns: Why frameworks are evil](https://tomasp.net/blog/2015/library-frameworks/) - [The Wrong Abstraction](https://sandimetz.com/blog/2016/1/20/the-wrong-abstraction) - [Mailcrew Agent](https://github.com/dexhorthy/mailcrew) - [Mailcrew Demo Video](https://www.youtube.com/watch?v=f_cKnoPC_Oo) - [Chainlit Demo](https://x.com/chainlit_io/status/1858613325921480922) - [TypeScript for LLMs](https://www.linkedin.com/posts/dexterihorthy_llms-typescript-aiagents-activity-7290858296679313408-Lh9e) - [Schema Aligned Parsing](https://www.boundaryml.com/blog/schema-aligned-parsing) - [Function Calling vs Structured Outputs vs JSON Mode](https://www.vellum.ai/blog/when-should-i-use-function-calling-structured-outputs-or-json-mode) - [BAML on GitHub](https://github.com/boundaryml/baml) - [OpenAI JSON vs Function Calling](https://docs.llamaindex.ai/en/stable/examples/llm/openai_json_vs_function_calling/) - [Outer Loop Agents](https://theouterloop.substack.com/p/openais-realtime-api-is-a-step-towards) - [Airflow](https://airflow.apache.org/) - [Prefect](https://www.prefect.io/) - [Dagster](https://dagster.io/) - [Inngest](https://www.inngest.com/) - [Windmill](https://www.windmill.dev/) - [The AI Agent Index (MIT)](https://aiagentindex.mit.edu/) - [NotebookLM on Finding Model Capability Boundaries](https://open.substack.com/pub/swyx/p/notebooklm?selection=08e1187c-cfee-4c63-93c9-71216640a5f8) ## Contributors Thanks to everyone who has contributed to 12-factor agents! [dexhorthy](https://github.com/dexhorthy) [Sypherd](https://github.com/Sypherd) [tofaramususa](https://github.com/tofaramususa) [a-churchill](https://github.com/a-churchill) [Elijas](https://github.com/Elijas) [hugolmn](https://github.com/hugolmn) [jeremypeters](https://github.com/jeremypeters) [kndl](https://github.com/kndl) [maciejkos](https://github.com/maciejkos) [pfbyjy](https://github.com/pfbyjy) [0xRaduan](https://github.com/0xRaduan) [zyuanlim](https://github.com/zyuanlim) [lombardo-chcg](https://github.com/lombardo-chcg) [sahanatvessel](https://github.com/sahanatvessel) ## License All content and images are licensed under a CC BY-SA 4.0 License Code is licensed under the Apache 2.0 License ================================================ FILE: content/appendix-13-pre-fetch.md ================================================ ### Factor 13 - pre-fetch all the context you might need If there's a high chance that your model will call tool X, don't waste token round trips telling the model to fetch it, that is, instead of a pseudo-prompt like: ```jinja When looking at deployments, you will likely want to fetch the list of published git tags, so you can use it to deploy to prod. Here's what happened so far: {{ thread.events }} What's the next step? Answer in JSON format with one of the following intents: { intent: 'deploy_backend_to_prod', tag: string } OR { intent: 'list_git_tags' } OR { intent: 'done_for_now', message: string } ``` and your code looks like ```python thread = {"events": [initial_message]} next_step = await determine_next_step(thread) while True: switch next_step.intent: case 'list_git_tags': tags = await fetch_git_tags() thread["events"].append({ type: 'list_git_tags', data: tags, }) case 'deploy_backend_to_prod': deploy_result = await deploy_backend_to_prod(next_step.data.tag) thread["events"].append({ "type": 'deploy_backend_to_prod', "data": deploy_result, }) case 'done_for_now': await notify_human(next_step.message) break # ... ``` You might as well just fetch the tags and include them in the context window, like: ```diff - When looking at deployments, you will likely want to fetch the list of published git tags, - so you can use it to deploy to prod. + The current git tags are: + {{ git_tags }} Here's what happened so far: {{ thread.events }} What's the next step? Answer in JSON format with one of the following intents: { intent: 'deploy_backend_to_prod', tag: string - } OR { - intent: 'list_git_tags' } OR { intent: 'done_for_now', message: string } ``` and your code looks like ```diff thread = {"events": [initial_message]} + git_tags = await fetch_git_tags() - next_step = await determine_next_step(thread) + next_step = await determine_next_step(thread, git_tags) while True: switch next_step.intent: - case 'list_git_tags': - tags = await fetch_git_tags() - thread["events"].append({ - type: 'list_git_tags', - data: tags, - }) case 'deploy_backend_to_prod': deploy_result = await deploy_backend_to_prod(next_step.data.tag) thread["events"].append({ "type": 'deploy_backend_to_prod', "data": deploy_result, }) case 'done_for_now': await notify_human(next_step.message) break # ... ``` or even just include the tags in the thread and remove the specific parameter from your prompt template: ```diff thread = {"events": [initial_message]} + # add the request + thread["events"].append({ + "type": 'list_git_tags', + }) git_tags = await fetch_git_tags() + # add the result + thread["events"].append({ + "type": 'list_git_tags_result', + "data": git_tags, + }) - next_step = await determine_next_step(thread, git_tags) + next_step = await determine_next_step(thread) while True: switch next_step.intent: case 'deploy_backend_to_prod': deploy_result = await deploy_backend_to_prod(next_step.data.tag) thread["events"].append(deploy_result) case 'done_for_now': await notify_human(next_step.message) break # ... ``` Overall: > #### If you already know what tools you'll want the model to call, just call them DETERMINISTICALLY and let the model do the hard part of figuring out how to use their outputs Again, AI engineering is all about [Context Engineering](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md). [← Stateless Reducer](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md) | [Further Reading →](https://github.com/humanlayer/12-factor-agents/blob/main/README.md#related-resources) ================================================ FILE: content/brief-history-of-software.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ## The longer version: how we got here ### You don't have to listen to me Whether you're new to agents or an ornery old veteran like me, I'm going to try to convince you to throw out most of what you think about AI Agents, take a step back, and rethink them from first principles. (spoiler alert if you didn't catch the OpenAI responses launch a few weeks back, but pushing MORE agent logic behind an API ain't it) ## Agents are software, and a brief history thereof let's talk about how we got here ### 60 years ago We're gonna talk a lot about Directed Graphs (DGs) and their Acyclic friends, DAGs. I'll start by pointing out that...well...software is a directed graph. There's a reason we used to represent programs as flow charts. ![010-software-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/010-software-dag.png) ### 20 years ago Around 20 years ago, we started to see DAG orchestrators become popular. We're talking classics like [Airflow](https://airflow.apache.org/), [Prefect](https://www.prefect.io/), some predecessors, and some newer ones like ([dagster](https://dagster.io/), [inggest](https://www.inngest.com/), [windmill](https://www.windmill.dev/)). These followed the same graph pattern, with the added benefit of observability, modularity, retries, administration, etc. ![015-dag-orchestrators](https://github.com/humanlayer/12-factor-agents/blob/main/img/015-dag-orchestrators.png) ### 10-15 years ago When ML models started to get good enough to be useful, we started to see DAGs with ML models sprinkled in. You might imagine steps like "summarize the text in this column into a new column" or "classify the support issues by severity or sentiment". ![020-dags-with-ml](https://github.com/humanlayer/12-factor-agents/blob/main/img/020-dags-with-ml.png) But at the end of the day, it's still mostly the same good old deterministic software. ### The promise of agents I'm not the first [person to say this](https://youtu.be/Dc99-zTMyMg?si=bcT0hIwWij2mR-40&t=73), but my biggest takeaway when I started learning about agents, was that you get to throw the DAG away. Instead of software engineers coding each step and edge case, you can give the agent a goal and a set of transitions: ![025-agent-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/025-agent-dag.png) And let the LLM make decisions in real time to figure out the path ![026-agent-dag-lines](https://github.com/humanlayer/12-factor-agents/blob/main/img/026-agent-dag-lines.png) The promise here is that you write less software, you just give the LLM the "edges" of the graph and let it figure out the nodes. You can recover from errors, you can write less code, and you may find that LLMs find novel solutions to problems. ### Agents as loops Put another way, you've got this loop consisting of 3 steps: 1. LLM determines the next step in the workflow, outputting structured json ("tool calling") 2. Deterministic code executes the tool call 3. The result is appended to the context window 4. repeat until the next step is determined to be "done" ```python initial_event = {"message": "..."} context = [initial_event] while True: next_step = await llm.determine_next_step(context) context.append(next_step) if (next_step.intent === "done"): return next_step.final_answer result = await execute_step(next_step) context.append(result) ``` Our initial context is just the starting event (maybe a user message, maybe a cron fired, maybe a webhook, etc), and we ask the llm to choose the next step (tool) or to determine that we're done. Here's a multi-step example: [![027-agent-loop-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif)](https://github.com/user-attachments/assets/3beb0966-fdb1-4c12-a47f-ed4e8240f8fd)
GIF Version ![027-agent-loop-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-animation.gif)
And the "materialized" DAG that was generated would look something like: ![027-agent-loop-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/027-agent-loop-dag.png) ### The problem with this "loop until you solve it" pattern The biggest problems with this pattern: - Agents get lost when the context window gets too long - they spin out trying the same broken approach over and over again - literally thats it, but that's enough to kneecap the approach Even if you haven't hand-rolled an agent, you've probably seen this long-context problem in working with agentic coding tools. They just get lost after a while and you need to start a new chat. I'll even perhaps posit something I've heard in passing quite a bit, and that YOU probably have developed your own intuition around: > ### **Even as models support longer and longer context windows, you'll ALWAYS get better results with a small, focused prompt and context** Most builders I've talked to **pushed the "tool calling loop" idea to the side** when they realized that anything more than 10-20 turns becomes a big mess that the LLM can't recover from. Even if the agent gets it right 90% of the time, that's miles away from "good enough to put in customer hands". Can you imagine a web app that crashed on 10% of page loads? **Update 2025-06-09** - I really like how [@swyx](https://x.com/swyx/status/1932125643384455237) put this: Screenshot 2025-07-02 at 11 50 50 AM ### What actually works - micro agents One thing that I **have** seen in the wild quite a bit is taking the agent pattern and sprinkling it into a broader more deterministic DAG. ![micro-agent-dag](https://github.com/humanlayer/12-factor-agents/blob/main/img/028-micro-agent-dag.png) You might be asking - "why use agents at all in this case?" - we'll get into that shortly, but basically, having language models managing well-scoped sets of tasks makes it easy to incorporate live human feedback, translating it into workflow steps without spinning out into context error loops. ([factor 1](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md), [factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) [factor 7](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md)). > #### having language models managing well-scoped sets of tasks makes it easy to incorporate live human feedback...without spinning out into context error loops ### A real life micro agent Here's an example of how deterministic code might run one micro agent responsible for handling the human-in-the-loop steps for deployment. ![029-deploybot-high-level](https://github.com/humanlayer/12-factor-agents/blob/main/img/029-deploybot-high-level.png) * **Human** Merges PR to GitHub main branch * **Deterministic Code** Deploys to staging env * **Deterministic Code** Runs end-to-end (e2e) tests against staging * **Deterministic Code** Hands to agent for prod deployment, with initial context: "deploy SHA 4af9ec0 to production" * **Agent** calls `deploy_frontend_to_prod(4af9ec0)` * **Deterministic code** requests human approval on this action * **Human** Rejects the action with feedback "can you deploy the backend first?" * **Agent** calls `deploy_backend_to_prod(4af9ec0)` * **Deterministic code** requests human approval on this action * **Human** approves the action * **Deterministic code** executed the backend deployment * **Agent** calls `deploy_frontend_to_prod(4af9ec0)` * **Deterministic code** requests human approval on this action * **Human** approves the action * **Deterministic code** executed the frontend deployment * **Agent** determines that the task was completed successfully, we're done! * **Deterministic code** run the end-to-end tests against production * **Deterministic code** task completed, OR pass to rollback agent to review failures and potentially roll back [![033-deploybot-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/033-deploybot.gif)](https://github.com/user-attachments/assets/deb356e9-0198-45c2-9767-231cb569ae13)
GIF Version ![033-deploybot-animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/033-deploybot.gif)
This example is based on a real life [OSS agent we've shipped to manage our deployments at Humanlayer](https://github.com/got-agents/agents/tree/main/deploybot-ts) - here is a real conversation I had with it last week: ![035-deploybot-conversation](https://github.com/humanlayer/12-factor-agents/blob/main/img/035-deploybot-conversation.png) We haven't given this agent a huge pile of tools or tasks. The primary value in the LLM is parsing the human's plaintext feedback and proposing an updated course of action. We isolate tasks and contexts as much as possible to keep the LLM focused on a small, 5-10 step workflow. Here's another [more classic support / chatbot demo](https://x.com/chainlit_io/status/1858613325921480922). ### So what's an agent really? - **prompt** - tell an LLM how to behave, and what "tools" it has available. The output of the prompt is a JSON object that describe the next step in the workflow (the "tool call" or "function call"). ([factor 2](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md)) - **switch statement** - based on the JSON that the LLM returns, decide what to do with it. (part of [factor 8](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md)) - **accumulated context** - store the list of steps that have happened and their results ([factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)) - **for loop** - until the LLM emits some sort of "Terminal" tool call (or plaintext response), add the result of the switch statement to the context window and ask the LLM to choose the next step. ([factor 8](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md)) ![040-4-components](https://github.com/humanlayer/12-factor-agents/blob/main/img/040-4-components.png) In the "deploybot" example, we gain a couple benefits from owning the control flow and context accumulation: - In our **switch statement** and **for loop**, we can hijack control flow to pause for human input or to wait for completion of long-running tasks - We can trivially serialize the **context** window for pause+resume - In our **prompt**, we can optimize the heck out of how we pass instructions and "what happened so far" to the LLM [Part II](https://github.com/humanlayer/12-factor-agents/blob/main/README.md#12-factor-agents) will **formalize these patterns** so they can be applied to add impressive AI features to any software project, without needing to go all in on conventional implementations/definitions of "AI agent". [Factor 1 - Natural Language to Tool Calls →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) ================================================ FILE: content/factor-01-natural-language-to-tool-calls.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 1. Natural Language to Tool Calls One of the most common patterns in agent building is to convert natural language to structured tool calls. This is a powerful pattern that allows you to build agents that can reason about tasks and execute them. ![110-natural-language-tool-calls](https://github.com/humanlayer/12-factor-agents/blob/main/img/110-natural-language-tool-calls.png) This pattern, when applied atomically, is the simple translation of a phrase like > can you create a payment link for $750 to Terri for sponsoring the february AI tinkerers meetup? to a structured object that describes a Stripe API call like ```json { "function": { "name": "create_payment_link", "parameters": { "amount": 750, "customer": "cust_128934ddasf9", "product": "prod_8675309", "price": "prc_09874329fds", "quantity": 1, "memo": "Hey Jeff - see below for the payment link for the february ai tinkerers meetup" } } } ``` **Note**: in reality the stripe API is a bit more complex, a [real agent that does this](https://github.com/dexhorthy/mailcrew) ([video](https://www.youtube.com/watch?v=f_cKnoPC_Oo)) would list customers, list products, list prices, etc to build this payload with the proper ids, or include those ids in the prompt/context window (we'll see below how those are kinda the same thing though!) From there, deterministic code can pick up the payload and do something with it. (More on this in [factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md)) ```python # The LLM takes natural language and returns a structured object nextStep = await llm.determineNextStep( """ create a payment link for $750 to Jeff for sponsoring the february AI tinkerers meetup """ ) # Handle the structured output based on its function if nextStep.function == 'create_payment_link': stripe.paymentlinks.create(nextStep.parameters) return # or whatever you want, see below elif nextStep.function == 'something_else': # ... more cases pass else: # the model didn't call a tool we know about # do something else pass ``` **NOTE**: While a full agent would then receive the API call result and loop with it, eventually returning something like > I've successfully created a payment link for $750 to Terri for sponsoring the february AI tinkerers meetup. Here's the link: https://buy.stripe.com/test_1234567890 **Instead**, We're actually going to skip that step here, and save it for another factor, which you may or may not want to also incorporate (up to you!) [← How We Got Here](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) | [Own Your Prompts →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) ================================================ FILE: content/factor-02-own-your-prompts.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 2. Own your prompts Don't outsource your prompt engineering to a framework. ![120-own-your-prompts](https://github.com/humanlayer/12-factor-agents/blob/main/img/120-own-your-prompts.png) By the way, [this is far from novel advice:](https://hamel.dev/blog/posts/prompt/) ![image](https://github.com/user-attachments/assets/575bab37-0f96-49fb-9ce3-9a883cdd420b) Some frameworks provide a "black box" approach like this: ```python agent = Agent( role="...", goal="...", personality="...", tools=[tool1, tool2, tool3] ) task = Task( instructions="...", expected_output=OutputModel ) result = agent.run(task) ``` This is great for pulling in some TOP NOTCH prompt engineering to get you started, but it is often difficult to tune and/or reverse engineer to get exactly the right tokens into your model. Instead, own your prompts and treat them as first-class code: ```rust function DetermineNextStep(thread: string) -> DoneForNow | ListGitTags | DeployBackend | DeployFrontend | RequestMoreInformation { prompt #" {{ _.role("system") }} You are a helpful assistant that manages deployments for frontend and backend systems. You work diligently to ensure safe and successful deployments by following best practices and proper deployment procedures. Before deploying any system, you should check: - The deployment environment (staging vs production) - The correct tag/version to deploy - The current system status You can use tools like deploy_backend, deploy_frontend, and check_deployment_status to manage deployments. For sensitive deployments, use request_approval to get human verification. Always think about what to do first, like: - Check current deployment status - Verify the deployment tag exists - Request approval if needed - Deploy to staging before production - Monitor deployment progress {{ _.role("user") }} {{ thread }} What should the next step be? "# } ``` (the above example uses [BAML](https://github.com/boundaryml/baml) to generate the prompt, but you can do this with any prompt engineering tool you want, or even just template it manually) If the signature looks a little funny, we'll get to that in [factor 4 - tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) ```typescript function DetermineNextStep(thread: string) -> DoneForNow | ListGitTags | DeployBackend | DeployFrontend | RequestMoreInformation { ``` Key benefits of owning your prompts: 1. **Full Control**: Write exactly the instructions your agent needs, no black box abstractions 2. **Testing and Evals**: Build tests and evals for your prompts just like you would for any other code 3. **Iteration**: Quickly modify prompts based on real-world performance 4. **Transparency**: Know exactly what instructions your agent is working with 5. **Role Hacking**: take advantage of APIs that support nonstandard usage of user/assistant roles - for example, the now-deprecated non-chat flavor of OpenAI "completions" API. This includes some so-called "model gaslighting" techniques Remember: Your prompts are the primary interface between your application logic and the LLM. Having full control over your prompts gives you the flexibility and prompt control you need for production-grade agents. I don't know what's the best prompt, but I know you want the flexibility to be able to try EVERYTHING. [← Natural Language To Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) | [Own Your Context Window →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) ================================================ FILE: content/factor-03-own-your-context-window.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 3. Own your context window You don't necessarily need to use standard message-based formats for conveying context to an LLM. > #### At any given point, your input to an LLM in an agent is "here's what's happened so far, what's the next step" Everything is context engineering. [LLMs are stateless functions](https://thedataexchange.media/baml-revolution-in-ai-engineering/) that turn inputs into outputs. To get the best outputs, you need to give them the best inputs. Creating great context means: - The prompt and instructions you give to the model - Any documents or external data you retrieve (e.g. RAG) - Any past state, tool calls, results, or other history - Any past messages or events from related but separate histories/conversations (Memory) - Instructions about what sorts of structured data to output ![image](https://github.com/user-attachments/assets/0f1f193f-8e94-4044-a276-576bd7764fd0) ### on context engineering This guide is all about getting as much as possible out of today's models. Notably not mentioned are: - Changes to models parameters like temperature, top_p, frequency_penalty, presence_penalty, etc. - Training your own completion or embedding models - Fine-tuning existing models Again, I don't know what's the best way to hand context to an LLM, but I know you want the flexibility to be able to try EVERYTHING. #### Standard vs Custom Context Formats Most LLM clients use a standard message-based format like this: ```yaml [ { "role": "system", "content": "You are a helpful assistant..." }, { "role": "user", "content": "Can you deploy the backend?" }, { "role": "assistant", "content": null, "tool_calls": [ { "id": "1", "name": "list_git_tags", "arguments": "{}" } ] }, { "role": "tool", "name": "list_git_tags", "content": "{\"tags\": [{\"name\": \"v1.2.3\", \"commit\": \"abc123\", \"date\": \"2024-03-15T10:00:00Z\"}, {\"name\": \"v1.2.2\", \"commit\": \"def456\", \"date\": \"2024-03-14T15:30:00Z\"}, {\"name\": \"v1.2.1\", \"commit\": \"abe033d\", \"date\": \"2024-03-13T09:15:00Z\"}]}", "tool_call_id": "1" } ] ``` While this works great for most use cases, if you want to really get THE MOST out of today's LLMs, you need to get your context into the LLM in the most token- and attention-efficient way you can. As an alternative to the standard message-based format, you can build your own context format that's optimized for your use case. For example, you can use custom objects and pack/spread them into one or more user, system, assistant, or tool messages as makes sense. Here's an example of putting the whole context window into a single user message: ```yaml [ { "role": "system", "content": "You are a helpful assistant..." }, { "role": "user", "content": | Here's everything that happened so far: From: @alex Channel: #deployments Text: Can you deploy the backend? intent: "list_git_tags" tags: - name: "v1.2.3" commit: "abc123" date: "2024-03-15T10:00:00Z" - name: "v1.2.2" commit: "def456" date: "2024-03-14T15:30:00Z" - name: "v1.2.1" commit: "ghi789" date: "2024-03-13T09:15:00Z" what's the next step? } ] ``` The model may infer that you're asking it `what's the next step` by the tool schemas you supply, but it never hurts to roll it into your prompt template. ### code example We can build this with something like: ```python class Thread: events: List[Event] class Event: # could just use string, or could be explicit - up to you type: Literal["list_git_tags", "deploy_backend", "deploy_frontend", "request_more_information", "done_for_now", "list_git_tags_result", "deploy_backend_result", "deploy_frontend_result", "request_more_information_result", "done_for_now_result", "error"] data: ListGitTags | DeployBackend | DeployFrontend | RequestMoreInformation | ListGitTagsResult | DeployBackendResult | DeployFrontendResult | RequestMoreInformationResult | string def event_to_prompt(event: Event) -> str: data = event.data if isinstance(event.data, str) \ else stringifyToYaml(event.data) return f"<{event.type}>\n{data}\n" def thread_to_prompt(thread: Thread) -> str: return '\n\n'.join(event_to_prompt(event) for event in thread.events) ``` #### Example Context Windows Here's how context windows might look with this approach: **Initial Slack Request:** ```xml From: @alex Channel: #deployments Text: Can you deploy the latest backend to production? ``` **After Listing Git Tags:** ```xml From: @alex Channel: #deployments Text: Can you deploy the latest backend to production? Thread: [] intent: "list_git_tags" tags: - name: "v1.2.3" commit: "abc123" date: "2024-03-15T10:00:00Z" - name: "v1.2.2" commit: "def456" date: "2024-03-14T15:30:00Z" - name: "v1.2.1" commit: "ghi789" date: "2024-03-13T09:15:00Z" ``` **After Error and Recovery:** ```xml From: @alex Channel: #deployments Text: Can you deploy the latest backend to production? Thread: [] intent: "deploy_backend" tag: "v1.2.3" environment: "production" error running deploy_backend: Failed to connect to deployment service intent: "request_more_information_from_human" question: "I had trouble connecting to the deployment service, can you provide more details and/or check on the status of the service?" data: response: "I'm not sure what's going on, can you check on the status of the latest workflow?" ``` From here your next step might be: ```python nextStep = await determine_next_step(thread_to_prompt(thread)) ``` ```python { "intent": "get_workflow_status", "workflow_name": "tag_push_prod.yaml", } ``` The XML-style format is just one example - the point is you can build your own format that makes sense for your application. You'll get better quality if you have the flexibility to experiment with different context structures and what you store vs. what you pass to the LLM. Key benefits of owning your context window: 1. **Information Density**: Structure information in ways that maximize the LLM's understanding 2. **Error Handling**: Include error information in a format that helps the LLM recover. Consider hiding errors and failed calls from context window once they are resolved. 3. **Safety**: Control what information gets passed to the LLM, filtering out sensitive data 4. **Flexibility**: Adapt the format as you learn what works best for your use case 5. **Token Efficiency**: Optimize context format for token efficiency and LLM understanding Context includes: prompts, instructions, RAG documents, history, tool calls, memory Remember: The context window is your primary interface with the LLM. Taking control of how you structure and present information can dramatically improve your agent's performance. Example - information density - same message, fewer tokens: ![Loom Screenshot 2025-04-22 at 09 00 56](https://github.com/user-attachments/assets/5cf041c6-72da-4943-be8a-99c73162b12a) ### Don't take it from me About 2 months after 12-factor agents was published, context engineering started to become a pretty popular term. Screenshot 2025-06-25 at 4 11 45 PM Screenshot 2025-06-25 at 4 12 59 PM There's also a quite good [Context Engineering Cheat Sheet](https://x.com/lenadroid/status/1943685060785524824) from [@lenadroid](https://x.com/lenadroid) from July 2025. image Recurring theme here: I don't know what's the best approach, but I know you want the flexibility to be able to try EVERYTHING. [← Own Your Prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) | [Tools Are Structured Outputs →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) ================================================ FILE: content/factor-04-tools-are-structured-outputs.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 4. Tools are just structured outputs Tools don't need to be complex. At their core, they're just structured output from your LLM that triggers deterministic code. ![140-tools-are-just-structured-outputs](https://github.com/humanlayer/12-factor-agents/blob/main/img/140-tools-are-just-structured-outputs.png) For example, lets say you have two tools `CreateIssue` and `SearchIssues`. To ask an LLM to "use one of several tools" is just to ask it to output JSON we can parse into an object representing those tools. ```python class Issue: title: str description: str team_id: str assignee_id: str class CreateIssue: intent: "create_issue" issue: Issue class SearchIssues: intent: "search_issues" query: str what_youre_looking_for: str ``` The pattern is simple: 1. LLM outputs structured JSON 3. Deterministic code executes the appropriate action (like calling an external API) 4. Results are captured and fed back into the context This creates a clean separation between the LLM's decision-making and your application's actions. The LLM decides what to do, but your code controls how it's done. Just because an LLM "called a tool" doesn't mean you have to go execute a specific corresponding function in the same way every time. If you recall our switch statement from above ```python if nextStep.intent == 'create_payment_link': stripe.paymentlinks.create(nextStep.parameters) return # or whatever you want, see below elif nextStep.intent == 'wait_for_a_while': # do something monadic idk else: #... the model didn't call a tool we know about # do something else ``` **Note**: there has been a lot said about the benefits of "plain prompting" vs. "tool calling" vs. "JSON mode" and the performance tradeoffs of each. We'll link some resources to that stuff soon, but not gonna get into it here. See [Prompting vs JSON Mode vs Function Calling vs Constrained Generation vs SAP](https://www.boundaryml.com/blog/schema-aligned-parsing), [When should I use function calling, structured outputs, or JSON mode?](https://www.vellum.ai/blog/when-should-i-use-function-calling-structured-outputs-or-json-mode#:~:text=We%20don%27t%20recommend%20using%20JSON,always%20use%20Structured%20Outputs%20instead) and [OpenAI JSON vs Function Calling](https://docs.llamaindex.ai/en/stable/examples/llm/openai_json_vs_function_calling/). The "next step" might not be as atomic as just "run a pure function and return the result". You unlock a lot of flexibility when you think of "tool calls" as just a model outputting JSON describing what deterministic code should do. Put this together with [factor 8 own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md). [← Own Your Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) | [Unify Execution State →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) ================================================ FILE: content/factor-05-unify-execution-state.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 5. Unify execution state and business state Even outside the AI world, many infrastructure systems try to separate "execution state" from "business state". For AI apps, this might involve complex abstractions to track things like current step, next step, waiting status, retry counts, etc. This separation creates complexity that may be worthwhile, but may be overkill for your use case. As always, it's up to you to decide what's right for your application. But don't think you *have* to manage them separately. More clearly: - **Execution state**: current step, next step, waiting status, retry counts, etc. - **Business state**: What's happened in the agent workflow so far (e.g. list of OpenAI messages, list of tool calls and results, etc.) If possible, SIMPLIFY - unify these as much as possible. [![155-unify-state](https://github.com/humanlayer/12-factor-agents/blob/main/img/155-unify-state-animation.gif)](https://github.com/user-attachments/assets/e5a851db-f58f-43d8-8b0c-1926c99fc68d)
GIF Version ![155-unify-state](https://github.com/humanlayer/12-factor-agents/blob/main/img/155-unify-state-animation.gif)
In reality, you can engineer your application so that you can infer all execution state from the context window. In many cases, execution state (current step, waiting status, etc.) is just metadata about what has happened so far. You may have things that can't go in the context window, like session ids, password contexts, etc, but your goal should be to minimize those things. By embracing [factor 3](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) you can control what actually goes into the LLM This approach has several benefits: 1. **Simplicity**: One source of truth for all state 2. **Serialization**: The thread is trivially serializable/deserializable 3. **Debugging**: The entire history is visible in one place 4. **Flexibility**: Easy to add new state by just adding new event types 5. **Recovery**: Can resume from any point by just loading the thread 6. **Forking**: Can fork the thread at any point by copying some subset of the thread into a new context / state ID 7. **Human Interfaces and Observability**: Trivial to convert a thread into a human-readable markdown or a rich Web app UI [← Tools Are Structured Outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) | [Launch/Pause/Resume →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) ================================================ FILE: content/factor-06-launch-pause-resume.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 6. Launch/Pause/Resume with simple APIs Agents are just programs, and we have things we expect from how to launch, query, resume, and stop them. [![pause-resume animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/165-pause-resume-animation.gif)](https://github.com/user-attachments/assets/feb1a425-cb96-4009-a133-8bd29480f21f)
GIF Version ![pause-resume animation](https://github.com/humanlayer/12-factor-agents/blob/main/img/165-pause-resume-animation.gif)
It should be easy for users, apps, pipelines, and other agents to launch an agent with a simple API. Agents and their orchestrating deterministic code should be able to pause an agent when a long-running operation is needed. External triggers like webhooks should enable agents to resume from where they left off without deep integration with the agent orchestrator. Closely related to [factor 5 - unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) and [factor 8 - own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md), but can be implemented independently. **Note** - often AI orchestrators will allow for pause and resume, but not between the moment of tool selection and tool execution. See also [factor 7 - contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) and [factor 11 - trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md). [← Unify Execution State](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) | [Contact Humans With Tools →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) ================================================ FILE: content/factor-07-contact-humans-with-tools.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 7. Contact humans with tool calls By default, LLM APIs rely on a fundamental HIGH-STAKES token choice: Are we returning plaintext content, or are we returning structured data? ![170-contact-humans-with-tools](https://github.com/humanlayer/12-factor-agents/blob/main/img/170-contact-humans-with-tools.png) You're putting a lot of weight on that choice of first token, which, in the `the weather in tokyo` case, is > "the" but in the `fetch_weather` case, it's some special token to denote the start of a JSON object. > |JSON> You might get better results by having the LLM *always* output json, and then declare it's intent with some natural language tokens like `request_human_input` or `done_for_now` (as opposed to a "proper" tool like `check_weather_in_city`). Again, you might not get any performance boost from this, but you should experiment, and ensure you're free to try weird stuff to get the best results. ```python class Options: urgency: Literal["low", "medium", "high"] format: Literal["free_text", "yes_no", "multiple_choice"] choices: List[str] # Tool definition for human interaction class RequestHumanInput: intent: "request_human_input" question: str context: str options: Options # Example usage in the agent loop if nextStep.intent == 'request_human_input': thread.events.append({ type: 'human_input_requested', data: nextStep }) thread_id = await save_state(thread) await notify_human(nextStep, thread_id) return # Break loop and wait for response to come back with thread ID else: # ... other cases ``` Later, you might receive a webhook from a system that handles slack, email, sms, or other events. ```python @app.post('/webhook') def webhook(req: Request): thread_id = req.body.threadId thread = await load_state(thread_id) thread.events.push({ type: 'response_from_human', data: req.body }) # ... simplified for brevity, you likely don't want to block the web worker here next_step = await determine_next_step(thread_to_prompt(thread)) thread.events.append(next_step) result = await handle_next_step(thread, next_step) # todo - loop or break or whatever you want return {"status": "ok"} ``` The above includes patterns from [factor 5 - unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md), [factor 8 - own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md), [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md), and [factor 4 - tools are just structured outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md), and several others. If we were using the XML-y formatted from [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md), our context window after a few turns might look like this: ```xml (snipped for brevity) From: @alex Channel: #deployments Text: Can you deploy backend v1.2.3 to production? Thread: [] intent: "request_human_input" question: "Would you like to proceed with deploying v1.2.3 to production?" context: "This is a production deployment that will affect live users." options: { urgency: "high" format: "yes_no" } response: "yes please proceed" approved: true timestamp: "2024-03-15T10:30:00Z" user: "alex@company.com" intent: "deploy_backend" tag: "v1.2.3" environment: "production" status: "success" message: "Deployment v1.2.3 to production completed successfully." timestamp: "2024-03-15T10:30:00Z" ``` Benefits: 1. **Clear Instructions**: Tools for different types of human contact allow for more specificity from the LLM 2. **Inner vs Outer Loop**: Enables agents workflows **outside** of the traditional chatGPT-style interface, where the control flow and context initialization may be `Agent->Human` rather than `Human->Agent` (think, agents kicked off by a cron or an event) 3. **Multiple Human Access**: Can easily track and coordinate input from different humans through structured events 4. **Multi-Agent**: Simple abstraction can be easily extended to support `Agent->Agent` requests and responses 5. **Durable**: Combined with [factor 6 - launch/pause/resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md), this makes for durable, reliable, and introspectable multiplayer workflows [More on Outer Loop Agents over here](https://theouterloop.substack.com/p/openais-realtime-api-is-a-step-towards) ![175-outer-loop-agents](https://github.com/humanlayer/12-factor-agents/blob/main/img/175-outer-loop-agents.png) Works great with [factor 11 - trigger from anywhere, meet users where they are](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) [← Launch/Pause/Resume](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) | [Own Your Control Flow →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) ================================================ FILE: content/factor-08-own-your-control-flow.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 8. Own your control flow If you own your control flow, you can do lots of fun things. ![180-control-flow](https://github.com/humanlayer/12-factor-agents/blob/main/img/180-control-flow.png) Build your own control structures that make sense for your specific use case. Specifically, certain types of tool calls may be reason to break out of the loop and wait for a response from a human or another long-running task like a training pipeline. You may also want to incorporate custom implementation of: - summarization or caching of tool call results - LLM-as-judge on structured output - context window compaction or other [memory management](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) - logging, tracing, and metrics - client-side rate limiting - durable sleep / pause / "wait for event" The below example shows three possible control flow patterns: - request_clarification: model asked for more info, break the loop and wait for a response from a human - fetch_git_tags: model asked for a list of git tags, fetch the tags, append to context window, and pass straight back to the model - deploy_backend: model asked to deploy a backend, this is a high-stakes thing, so break the loop and wait for human approval ```python def handle_next_step(thread: Thread): while True: next_step = await determine_next_step(thread_to_prompt(thread)) # inlined for clarity - in reality you could put # this in a method, use exceptions for control flow, or whatever you want if next_step.intent == 'request_clarification': thread.events.append({ type: 'request_clarification', data: nextStep, }) await send_message_to_human(next_step) await db.save_thread(thread) # async step - break the loop, we'll get a webhook later break elif next_step.intent == 'fetch_open_issues': thread.events.append({ type: 'fetch_open_issues', data: next_step, }) issues = await linear_client.issues() thread.events.append({ type: 'fetch_open_issues_result', data: issues, }) # sync step - pass the new context to the LLM to determine the NEXT next step continue elif next_step.intent == 'create_issue': thread.events.append({ type: 'create_issue', data: next_step, }) await request_human_approval(next_step) await db.save_thread(thread) # async step - break the loop, we'll get a webhook later break ``` This pattern allows you to interrupt and resume your agent's flow as needed, creating more natural conversations and workflows. **Example** - the number one feature request I have for every AI framework out there is we need to be able to interrupt a working agent and resume later, ESPECIALLY between the moment of tool **selection** and the moment of tool **invocation**. Without this level of resumability/granularity, there's no way to review/approve the tool call before it runs, which means you're forced to either: 1. Pause the task in memory while waiting for the long-running thing to complete (think `while...sleep`) and restart it from the beginning if the process is interrupted 2. Restrict the agent to only low-stakes, low-risk calls like research and summarization 3. Give the agent access to do bigger, more useful things, and just yolo hope it doesn't screw up You may notice this is closely related to [factor 5 - unify execution state and business state](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) and [factor 6 - launch/pause/resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md), but can be implemented independently. [← Contact Humans With Tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) | [Compact Errors →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) ================================================ FILE: content/factor-09-compact-errors.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 9. Compact Errors into Context Window This one is a little short but is worth mentioning. One of these benefits of agents is "self-healing" - for short tasks, an LLM might call a tool that fails. Good LLMs have a fairly good chance of reading an error message or stack trace and figuring out what to change in a subsequent tool call. Most frameworks implement this, but you can do JUST THIS without doing any of the other 11 factors. Here's an example: ```python thread = {"events": [initial_message]} while True: next_step = await determine_next_step(thread_to_prompt(thread)) thread["events"].append({ "type": next_step.intent, "data": next_step, }) try: result = await handle_next_step(thread, next_step) # our switch statement except Exception as e: # if we get an error, we can add it to the context window and try again thread["events"].append({ "type": 'error', "data": format_error(e), }) # loop, or do whatever else here to try to recover ``` You may want to implement an errorCounter for a specific tool call, to limit to ~3 attempts of a single tool, or whatever other logic makes sense for your use case. ```python consecutive_errors = 0 while True: # ... existing code ... try: result = await handle_next_step(thread, next_step) thread["events"].append({ "type": next_step.intent + '_result', data: result, }) # success! reset the error counter consecutive_errors = 0 except Exception as e: consecutive_errors += 1 if consecutive_errors < 3: # do the loop and try again thread["events"].append({ "type": 'error', "data": format_error(e), }) else: # break the loop, reset parts of the context window, escalate to a human, or whatever else you want to do break } } ``` Hitting some consecutive-error-threshold might be a great place to [escalate to a human](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md), whether by model decision or via deterministic takeover of the control flow. [![195-factor-09-errors](https://github.com/humanlayer/12-factor-agents/blob/main/img/195-factor-09-errors.gif)](https://github.com/user-attachments/assets/cd7ed814-8309-4baf-81a5-9502f91d4043)
[GIF Version](https://github.com/humanlayer/12-factor-agents/blob/main/img/195-factor-09-errors.gif) ![195-factor-09-errors](https://github.com/humanlayer/12-factor-agents/blob/main/img/195-factor-09-errors.gif)
Benefits: 1. **Self-Healing**: The LLM can read the error message and figure out what to change in a subsequent tool call 2. **Durable**: The agent can continue to run even if one tool call fails I'm sure you will find that if you do this TOO much, your agent will start to spin out and might repeat the same error over and over again. That's where [factor 8 - own your control flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) and [factor 3 - own your context building](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) come in - you don't need to just put the raw error back on, you can completely restructure how it's represented, remove previous events from the context window, or whatever deterministic thing you find works to get an agent back on track. But the number one way to prevent error spin-outs is to embrace [factor 10 - small, focused agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md). [← Own Your Control Flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) | [Small Focused Agents →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) ================================================ FILE: content/factor-1-natural-language-to-tool-calls.md ================================================ [Moved to factor-01-natural-language-to-tool-calls.md](./factor-01-natural-language-to-tool-calls.md) ================================================ FILE: content/factor-10-small-focused-agents.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 10. Small, Focused Agents Rather than building monolithic agents that try to do everything, build small, focused agents that do one thing well. Agents are just one building block in a larger, mostly deterministic system. ![1a0-small-focused-agents](https://github.com/humanlayer/12-factor-agents/blob/main/img/1a0-small-focused-agents.png) The key insight here is about LLM limitations: the bigger and more complex a task is, the more steps it will take, which means a longer context window. As context grows, LLMs are more likely to get lost or lose focus. By keeping agents focused on specific domains with 3-10, maybe 20 steps max, we keep context windows manageable and LLM performance high. > #### As context grows, LLMs are more likely to get lost or lose focus Benefits of small, focused agents: 1. **Manageable Context**: Smaller context windows mean better LLM performance 2. **Clear Responsibilities**: Each agent has a well-defined scope and purpose 3. **Better Reliability**: Less chance of getting lost in complex workflows 4. **Easier Testing**: Simpler to test and validate specific functionality 5. **Improved Debugging**: Easier to identify and fix issues when they occur ### What if LLMs get smarter? Do we still need this if LLMs get smart enough to handle 100-step+ workflows? tl;dr yes. As agents and LLMs improve, they **might** naturally expand to be able to handle longer context windows. This means handling MORE of a larger DAG. This small, focused approach ensures you can get results TODAY, while preparing you to slowly expand agent scope as LLM context windows become more reliable. (If you've refactored large deterministic code bases before, you may be nodding your head right now). [![gif](https://github.com/humanlayer/12-factor-agents/blob/main/img/1a5-agent-scope-grow.gif)](https://github.com/user-attachments/assets/0cd3f52c-046e-4d5e-bab4-57657157c82f )
GIF Version ![gif](https://github.com/humanlayer/12-factor-agents/blob/main/img/1a5-agent-scope-grow.gif)
Being intentional about size/scope of agents, and only growing in ways that allow you to maintain quality, is key here. As the [team that built NotebookLM put it](https://open.substack.com/pub/swyx/p/notebooklm?selection=08e1187c-cfee-4c63-93c9-71216640a5f8&utm_campaign=post-share-selection&utm_medium=web): > I feel like consistently, the most magical moments out of AI building come about for me when I'm really, really, really just close to the edge of the model capability Regardless of where that boundary is, if you can find that boundary and get it right consistently, you'll be building magical experiences. There are many moats to be built here, but as usual, they take some engineering rigor. [← Compact Errors](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) | [Trigger From Anywhere →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) ================================================ FILE: content/factor-11-trigger-from-anywhere.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 11. Trigger from anywhere, meet users where they are If you're waiting for the [humanlayer](https://humanlayer.dev) pitch, you made it. If you're doing [factor 6 - launch/pause/resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) and [factor 7 - contact humans with tool calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md), you're ready to incorporate this factor. ![1b0-trigger-from-anywhere](https://github.com/humanlayer/12-factor-agents/blob/main/img/1b0-trigger-from-anywhere.png) Enable users to trigger agents from slack, email, sms, or whatever other channel they want. Enable agents to respond via the same channels. Benefits: - **Meet users where they are**: This helps you build AI applications that feel like real humans, or at the very least, digital coworkers - **Outer Loop Agents**: Enable agents to be triggered by non-humans, e.g. events, crons, outages, whatever else. They may work for 5, 20, 90 minutes, but when they get to a critical point, they can contact a human for help, feedback, or approval - **High Stakes Tools**: If you're able to quickly loop in a variety of humans, you can give agents access to higher stakes operations like sending external emails, updating production data and more. Maintaining clear standards gets you auditability and confidence in agents that [perform bigger better things](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md#what-if-llms-get-smarter) [← Small Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) | [Stateless Reducer →](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-12-stateless-reducer.md) ================================================ FILE: content/factor-12-stateless-reducer.md ================================================ [← Back to README](https://github.com/humanlayer/12-factor-agents/blob/main/README.md) ### 12. Make your agent a stateless reducer Okay so we're over 1000 lines of markdown at this point. This one is mostly just for fun. ![1c0-stateless-reducer](https://github.com/humanlayer/12-factor-agents/blob/main/img/1c0-stateless-reducer.png) ![1c5-agent-foldl](https://github.com/humanlayer/12-factor-agents/blob/main/img/1c5-agent-foldl.png) [← Trigger From Anywhere](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) | [Appendix - Pre-Fetch Context →](https://github.com/humanlayer/12-factor-agents/blob/main/content/appendix-13-pre-fetch.md) ================================================ FILE: content/factor-2-own-your-prompts.md ================================================ [Moved to factor-02-own-your-prompts.md](./factor-02-own-your-prompts.md) ================================================ FILE: content/factor-3-own-your-context-window.md ================================================ [Moved to factor-03-own-your-context-window.md](./factor-03-own-your-context-window.md) ================================================ FILE: content/factor-4-tools-are-structured-outputs.md ================================================ [Moved to factor-04-tools-are-structured-outputs.md](./factor-04-tools-are-structured-outputs.md) ================================================ FILE: content/factor-5-unify-execution-state.md ================================================ [Moved to factor-05-unify-execution-state.md](./factor-05-unify-execution-state.md) ================================================ FILE: content/factor-6-launch-pause-resume.md ================================================ [Moved to factor-06-launch-pause-resume.md](./factor-06-launch-pause-resume.md) ================================================ FILE: content/factor-7-contact-humans-with-tools.md ================================================ [Moved to factor-07-contact-humans-with-tools.md](./factor-07-contact-humans-with-tools.md) ================================================ FILE: content/factor-8-own-your-control-flow.md ================================================ [Moved to factor-08-own-your-control-flow.md](./factor-08-own-your-control-flow.md) ================================================ FILE: content/factor-9-compact-errors.md ================================================ [Moved to factor-09-compact-errors.md](./factor-09-compact-errors.md) ================================================ FILE: drafts/a2h-spec.md ================================================ # A2H - The Agent-to-Human Protocol ## Overview A2H is a service that allows an agent to request human interaction ## Why another protocol? MCP and A2A are not enough ## Shoulds - Clients should respect A2H_BASE_URL and A2H_API_KEY environment variables if set, to allow for simple oauth2-based authentication to REST services. ## Core Protocol ### Scopes The A2H protocol supports two scopes: - The agent side, APIs consumed by an agent to request human interaction - The (Optional) admin side, APIs consumed by an admin or web application to manage humans and their contact channels This separation allows for agents to query and find humans to contact, without exposing the human's contact details to the agent. It is the responsibility of the A2H provider to relay agent requests to the appropriate human via that human's preferred contact channel(s). ### Objects ``` apiVersion: proto.a2h.dev/v1alpha1 kind: Message metatdata: uid: "123" spec: # spec sent by agent message: "" # message from the agent response_schema: # optional, json schema for the response, channel_id: status: # status resolved by a2h server humanMessage: "" # message from the human response: # optional, matches spec schema ``` ``` apiVersion: proto.a2h.dev/v1alpha1 kind: NewConversation metadata: uid: "abc" spec: # spec sent by a2h server message: "" # message from the agent channel_id: "123" # channel id to use for future conversations response_schema: # optional, json schema for the response, ``` #### HumanContact ```json { "run_id": "run_123", "call_id": "call_456", "spec": { "msg": "I've tried using the tool to refund the customer but its returning a 500 error. Can you help?", "channel": { "slack": { "channel_or_user_id": "U1234567890", "context_about_channel_or_user": "Support team lead" } }, }, } ``` A HumanContact represents a request for human interaction. It contains: - `run_id` (string): Unique identifier for the run - `call_id` (string): Unique identifier for the contact request - `spec` (HumanContactSpec): The specification for the contact request - `status` (HumanContactStatus, optional): The current status of the contact request The HumanContactSpec contains: - `msg` (string): The message to send to the human - `subject` (string, optional): Subject of the contact request - `channel` (ContactChannel, optional): The channel to use for contact - `response_options` (ResponseOption[], optional): Available response options - `state` (object, optional): Additional state information The HumanContactStatus contains: - `requested_at` (datetime, optional): When the contact was requested - `responded_at` (datetime, optional): When the human responded - `response` (string, optional): The human's response - `response_option_name` (string, optional): Name of the selected response option - `slack_message_ts` (string, optional): Slack message timestamp if applicable - `failed_validation_details` (object, optional): Details if validation failed #### FunctionCall Example: ```json { "run_id": "run_789", "call_id": "call_101", "spec": { "fn": "process_payment", "kwargs": { "amount": 100.00, "currency": "USD", "recipient": "merchant_123" }, "channel": { "email": { "address": "ap@example.com", } }, }, "status": { "requested_at": "2024-03-20T11:00:00Z", "responded_at": "2024-03-20T11:02:00Z", "approved": true, "comment": "Payment looks good, approved", "user_info": { "name": "John Doe", "role": "Finance Manager" }, "slack_message_ts": "1234567890.123457" } } ``` A FunctionCall represents a request for human approval of a function execution. It contains: - `run_id` (string): Unique identifier for the run - `call_id` (string): Unique identifier for the function call - `spec` (FunctionCallSpec): The specification for the function call - `status` (FunctionCallStatus, optional): The current status of the function call The FunctionCallSpec contains: - `fn` (string): The function to be called - `kwargs` (object): The keyword arguments for the function - `channel` (ContactChannel, optional): The channel to use for contact - `reject_options` (ResponseOption[], optional): Available rejection options - `state` (object, optional): Additional state information The FunctionCallStatus contains: - `requested_at` (datetime, optional): When the approval was requested - `responded_at` (datetime, optional): When the human responded - `approved` (boolean, optional): Whether the function call was approved - `comment` (string, optional): Any comment from the human - `user_info` (object, optional): Information about the responding user - `slack_context` (object, optional): Slack-specific context - `reject_option_name` (string, optional): Name of the selected rejection option - `slack_message_ts` (string, optional): Slack message timestamp if applicable - `failed_validation_details` (object, optional): Details if validation failed #### ContactChannel Example: ```json { "slack": { "channel_or_user_id": "U1234567890", "context_about_channel_or_user": "Support team lead", "allowed_responder_ids": ["U1234567890", "U2345678901"], "experimental_slack_blocks": true, "thread_ts": "1234567890.123456" } } ``` or ```json { "email": { "address": "ap@example.com", "context_about_user": "Accounts Payable", "in_reply_to_message_id": "1234567890", "references_message_id": "1234567890", "template": "..." } } ``` A ContactChannel represents a channel through which a human can be contacted. The protocol supports several channel types: 1. SlackContactChannel: - `channel_or_user_id` (string): The Slack channel or user ID - `context_about_channel_or_user` (string, optional): Additional context - `bot_token` (string, optional): Bot token for authentication - `allowed_responder_ids` (string[], optional): IDs of allowed responders - `experimental_slack_blocks` (boolean, optional): Enable experimental blocks - `thread_ts` (string, optional): Thread timestamp for threaded messages 2. SMSContactChannel: - `phone_number` (string): The phone number to contact - `context_about_user` (string, optional): Additional context about the user 3. WhatsAppContactChannel: - `phone_number` (string): The phone number to contact - `context_about_user` (string, optional): Additional context about the user #### Human (Agent Side) From the agent's perspective, a human is an object that has a name and description. #### Human (Admin Side) From the admin's perspective, a human is an object that has a name, description, and a list of prioritized contact channels, with details ### Agent Endpoints #### POST /human_contacts #### GET /human_contacts/:call_id #### POST /function_calls #### GET /function_calls/:call_id ## Extended Protocol - Admin Humans - Agent Humans Get - Agent Humans Search - Agent Channels List - Agent Channels validate ### Objects #### Human (Agent Side) From the agent's perspective, a human is an object that has a name and description. #### Human (Admin Side) From the admin's perspective, a human is an object that has a name, description, and a list of prioritized contact channels, with details ### Agent Endpoints #### GET /channels return what contact channels are available and their supported fields example response: ```json { "channels": { "slack": { "channelOrUserId": { "type": "string", "description": "The Slack channel or user ID to send messages to" }, "contextAboutChannelOrUser": { "type": "string", "description": "Additional context about the Slack channel or user" } }, "email": { "address": { "type": "string", "description": "Email address to send messages to" }, "contextAboutUser": { "type": "string", "description": "Additional context about the email recipient" }, "inReplyToMessageId": { "type": "string", "description": "The message ID of the email to reply to" }, "referencesMessageId": { "type": "string", "description": "The message ID of the email to reference" } } } } ``` #### GET /humans return a list of humans that are available to interact with example response: ```json { "humans": [ { "id": "654", "name": "Jane Doe", "description": "Jane Doe is a human who knows about technology and entrepreneurship", }, { "id": "123", "name": "John Doe", "description": "John Doe is a human who knows about sales and marketing" } ] } #### GET /humans/search?q= search for humans by name or description example response: ```json { "humans": [ { "id": "654", "name": "Jane Doe", "description": "Jane Doe is a human who knows about technology and entrepreneurship", }, ] } ``` ### Administrative Endpoints #### POST /humans Enroll a new human for agent contact example request: ```json { "name": "John Doe", "description": "John Doe is a human who knows about sales and marketing", "prioritizedContactChannels": [ { "slack": { "channelOrUserId": "U1234567890", } }, { "email": { "address": "john.doe@example.com", } } ] } ``` #### GET /humans/:id Get a human by id example response: ```json ================================================ FILE: drafts/ah2-openapi.json ================================================ ================================================ FILE: hack/contributors_markdown/.python-version ================================================ 3.13 ================================================ FILE: hack/contributors_markdown/README.md ================================================ ================================================ FILE: hack/contributors_markdown/contributors_markdown.py ================================================ #!/usr/bin/env python3 """ Generate a Markdown grid of contributor avatars for a GitHub repository. Usage: python generate_contributors_grid.py --repo owner/name --token GH_TOKEN [--cols 7] [--image_size 80] [--output FILE] Arguments: --repo GitHub repository in "owner/name" form (e.g. "octocat/Hello-World") --token Personal access token with `public_repo` scope (or `repo` for private). Can also be provided via the GITHUB_TOKEN environment variable. --cols Number of avatars per row in the generated grid (default 7). --image_size Pixel width for avatars (GitHub automatically resizes; default 80). --output File to write the Markdown grid into (default: stdout, use '-' for stdout). The generated file contains a Markdown table‑less grid of linked avatars that can be embedded in README.md or any other Markdown document. """ from __future__ import annotations import argparse import os import sys import textwrap from typing import List, Dict import requests API_URL_TEMPLATE = "https://api.github.com/repos/{owner}/{repo}/contributors" def fetch_contributors(owner: str, repo: str, token: str | None, per_page: int = 100) -> List[Dict]: """Return a list of contributor objects from the GitHub REST API.""" headers = {"Accept": "application/vnd.github+json"} if token: headers["Authorization"] = f"Bearer {token}" contributors: List[Dict] = [] page = 1 while True: url = f"{API_URL_TEMPLATE.format(owner=owner, repo=repo)}?per_page={per_page}&page={page}" response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() batch = response.json() if not batch: break contributors.extend(batch) if len(batch) < per_page: break page += 1 return contributors def build_markdown(contributors: List[Dict], cols: int = 7, image_size: int = 80) -> str: """Return a Markdown fragment containing a grid of linked avatar images.""" lines: List[str] = [] row: List[str] = [] for contributor in contributors: login = contributor["login"] avatar = f"{contributor['avatar_url']}&s={image_size}" profile = contributor["html_url"] cell = f'[{login}]({profile})' row.append(cell) if len(row) == cols: lines.append(" ".join(row)) row = [] if row: lines.append(" ".join(row)) return "\n\n".join(lines) def main() -> None: parser = argparse.ArgumentParser(description="Generate a Markdown grid of contributor avatars") parser.add_argument("--repo", required=True, help="GitHub repo in owner/name form") parser.add_argument("--token", help="GitHub Personal Access Token (or set GITHUB_TOKEN env)") parser.add_argument("--cols", type=int, default=7, help="Number of avatars per row (default 7)") parser.add_argument("--image_size", type=int, default=80, help="Avatar size in px (default 80)") parser.add_argument("--output", "-o", default="-", help="Output file (default: stdout, use '-' for stdout)") args = parser.parse_args() token = args.token or os.getenv("GITHUB_TOKEN") if not token: parser.error("A GitHub token must be supplied via --token or GITHUB_TOKEN env var.") if "/" not in args.repo: parser.error("--repo must be in 'owner/name' form") owner, repo = args.repo.split("/", 1) contributors = fetch_contributors(owner, repo, token) if not contributors: sys.exit("No contributors found. Is the repository correct and does the token have access?") markdown = build_markdown(contributors, cols=args.cols, image_size=args.image_size) header = textwrap.dedent( f""" ## Contributors Thanks to these wonderful people:\n """ ) if args.output == "-": sys.stdout.write(header) sys.stdout.write(markdown) sys.stdout.write("\n") else: with open(args.output, "w", encoding="utf-8") as fh: fh.write(header) fh.write(markdown) fh.write("\n") print(f"Wrote {len(contributors)} contributors to {args.output}", file=sys.stderr) if __name__ == "__main__": main() ================================================ FILE: hack/contributors_markdown/pyproject.toml ================================================ [project] name = "contributors-markdown" version = "0.1.0" description = "Add your description here" readme = "README.md" requires-python = ">=3.13" dependencies = [ "requests>=2.32.3", ] ================================================ FILE: packages/create-12-factor-agent/template/.gitignore ================================================ baml_client/ node_modules/ .threads/ ================================================ FILE: packages/create-12-factor-agent/template/README.md ================================================ # Chapter 0 - Hello World Let's start with a basic TypeScript setup and a hello world program. This guide is written in TypeScript (yes, a python version is coming soon) There are many checkpoints between the every file edit in theworkshop steps, so even if you aren't super familiar with typescript, you should be able to keep up and run each example. To run this guide, you'll need a relatively recent version of nodejs and npm installed You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine brew install node@20 You should see the node version node --version Copy initial package.json cp ./walkthrough/00-package.json package.json Install dependencies npm install Copy tsconfig.json cp ./walkthrough/00-tsconfig.json tsconfig.json add .gitignore cp ./walkthrough/00-.gitignore .gitignore Create src folder mkdir -p src Add a simple hello world index.ts cp ./walkthrough/00-index.ts src/index.ts Run it to verify npx tsx src/index.ts You should see: hello, world! # Chapter 1 - CLI and Agent Loop Now let's add BAML and create our first agent with a CLI interface. First, we'll need to install [BAML](https://github.com/boundaryml/baml) which is a tool for prompting and structured outputs. npm install @boundaryml/baml Initialize BAML npx baml-cli init Remove default resume.baml rm baml_src/resume.baml Add our starter agent, a single baml prompt that we'll build on cp ./walkthrough/01-agent.baml baml_src/agent.baml Generate BAML client code npx baml-cli generate Enable BAML logging for this section export BAML_LOG=debug Add the CLI interface cp ./walkthrough/01-cli.ts src/cli.ts Update index.ts to use the CLI cp ./walkthrough/01-index.ts src/index.ts Add the agent implementation cp ./walkthrough/01-agent.ts src/agent.ts The the BAML code is configured to use BASETEN_API_KEY by default To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co), and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/). ```rust function DetermineNextStep(thread: string) -> DoneForNow { client Qwen3 // ... ``` If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key. If you want to try swapping out the model, you can change the `client` line. [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms) For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider. For example, to use openai with an OPENAI_API_KEY, you can do: client "openai/gpt-4o" Set your env vars export BASETEN_API_KEY=... export BASETEN_BASE_URL=... Try it out npx tsx src/index.ts hello you should see a familiar response from the model { intent: 'done_for_now', message: 'Hello! How can I assist you today?' } # Chapter 2 - Add Calculator Tools Let's add some calculator tools to our agent. Let's start by adding a tool definition for the calculator These are simpile structured outputs that we'll ask the model to return as a "next step" in the agentic loop. cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps cp ./walkthrough/02-agent.baml baml_src/agent.baml Generate updated BAML client npx baml-cli generate Try out the calculator npx tsx src/index.ts 'can you add 3 and 4' You should see a tool call to the calculator { intent: 'add', a: 3, b: 4 } # Chapter 3 - Process Tool Calls in a Loop Now let's add a real agentic loop that can run the tools and get a final answer from the LLM. First, lets update the agent to handle the tool call cp ./walkthrough/03-agent.ts src/agent.ts Now, lets try it out npx tsx src/index.ts 'can you add 3 and 4' you should see the agent call the tool and then return the result { intent: 'done_for_now', message: 'The sum of 3 and 4 is 7.' } For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output export BAML_LOG=off Try a multi-step calculation npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result' you'll notice that tools like multiply and divide are not available npx tsx src/index.ts 'can you multiply 3 and 4' next, let's add handlers for the rest of the calculator tools cp ./walkthrough/03b-agent.ts src/agent.ts Test subtraction npx tsx src/index.ts 'can you subtract 3 from 4' now, let's test the multiplication tool npx tsx src/index.ts 'can you multiply 3 and 4' finally, let's test a more complex calculation with multiple operations npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' congratulations, you've taking your first step into hand-rolling an agent loop. from here, we're going to start incorporating some more intermediate and advanced concepts for 12-factor agents. # Chapter 4 - Add Tests to agent.baml Let's add some tests to our BAML agent. to start, leave the baml logs enabled export BAML_LOG=debug next, let's add some tests to the agent We'll start with a simple test that checks the agent's ability to handle a basic calculation. cp ./walkthrough/04-agent.baml baml_src/agent.baml Run the tests npx baml-cli test now, let's improve the test with assertions! Assertions are a great way to make sure the agent is working as expected, and can easily be extended to check for more complex behavior. cp ./walkthrough/04b-agent.baml baml_src/agent.baml Run the tests npx baml-cli test as you add more tests, you can disable the logs to keep the output clean. You may want to turn them on as you iterate on specific tests. export BAML_LOG=off now, let's add some more complex test cases, where we resume from in the middle of an in-progress agentic context window cp ./walkthrough/04c-agent.baml baml_src/agent.baml let's try to run it npx baml-cli test # Chapter 5 - Multiple Human Tools In this section, we'll add support for multiple tools that serve to contact humans. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off first, let's add a tool that can request clarification from a human this will be different from the "done_for_now" tool, and can be used to more flexibly handle different types of human interactions in your agent. cp ./walkthrough/05-agent.baml baml_src/agent.baml next, let's re-generate the client code NOTE - if you're using the VSCode extension for BAML, the client will be regenerated automatically when you save the file in your editor. npx baml-cli generate now, let's update the agent to use the new tool cp ./walkthrough/05-agent.ts src/agent.ts next, let's update the CLI to handle clarification requests by requesting input from the user on the CLI cp ./walkthrough/05-cli.ts src/cli.ts let's try it out npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& ' next, let's add a test that checks the agent's ability to handle a clarification request cp ./walkthrough/05b-agent.baml baml_src/agent.baml and now we can run the tests again npx baml-cli test you'll notice the new test passes, but the hello world test fails This is because the agent's default behavior is to return "done_for_now" cp ./walkthrough/05c-agent.baml baml_src/agent.baml Verify tests pass npx baml-cli test # Chapter 6 - Customize Your Prompt with Reasoning In this section, we'll explore how to customize the prompt of the agent with reasoning steps. this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts) for this section, it will be helpful to leave the baml logs enabled export BAML_LOG=debug update the agent prompt to include a reasoning step cp ./walkthrough/06-agent.baml baml_src/agent.baml generate the updated client npx baml-cli generate now, you can try it out with a simple prompt npx tsx src/index.ts 'can you multiply 3 and 4' you should see output from the baml logs showing the reasoning steps #### optional challenge add a field to your tool output format that includes the reasoning steps in the output! # Chapter 7 - Customize Your Context Window In this section, we'll explore how to customize the context window of the agent. this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) update the agent to pretty-print the Context window for the model cp ./walkthrough/07-agent.ts src/agent.ts Test the formatting BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' next, let's update the agent to use XML formatting instead this is a very popular format for passing data to a model, among other things, because of the token efficiency of XML. cp ./walkthrough/07b-agent.ts src/agent.ts let's try it out BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' lets update our tests to match the new output format cp ./walkthrough/07c-agent.baml baml_src/agent.baml check out the updated tests npx baml-cli test # Chapter 8 - Adding API Endpoints Add an Express server to expose the agent via HTTP. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install Express and types npm install express && npm install --save-dev @types/express supertest Add the server implementation cp ./walkthrough/08-server.ts src/server.ts Start the server npx tsx src/server.ts Test with curl (in another terminal) curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you add 3 and 4"}' You should get an answer from the agent which includes the agentic trace, ending in a message like: {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."} # Chapter 9 - In-Memory State and Async Clarification Add state management and async clarification support. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Add some simple in-memory state management for threads cp ./walkthrough/09-state.ts src/state.ts update the server to use the state management * Add thread state management using `ThreadStore` * return thread IDs and response URLs from the /thread endpoint * implement GET /thread/:id * implement POST /thread/:id/response cp ./walkthrough/09-server.ts src/server.ts Start the server npx tsx src/server.ts Test clarification flow curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you multiply 3 and xyz"}' # Chapter 10 - Adding Human Approval Add support for human approval of operations. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off update the server to handle human approvals * Import `handleNextStep` to execute approved actions * Add two payload types to distinguish approvals from responses * Handle responses and approvals differently in the endpoint * Show better error messages when things go wrongs cp ./walkthrough/10-server.ts src/server.ts Add a few methods to the agent to handle approvals and responses cp ./walkthrough/10-agent.ts src/agent.ts Start the server npx tsx src/server.ts Test division with approval curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you divide 3 by 4"}' You should see: { "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b", "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } } ] } reject the request with another curl call, changing the thread ID curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}' You should see: the last tool call is now `"intent":"divide","a":3,"b":5` { "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } }, { "type": "tool_response", "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\"" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 5, "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response" } } ] } now you can approve the operation curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": true}' you should see the final message includes the tool response and final result! ... { "type": "tool_response", "data": 0.5 }, { "type": "done_for_now", "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!", "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response" } # Chapter 11 - Human Approvals over email in this section, we'll add support for human approvals over email. This will start a little bit contrived, just to get the concepts down - We'll start by invoking the workflow from the CLI but approvals for `divide` and `request_more_information` will be handled over email, then the final `done_for_now` answer will be printed back to the CLI While contrived, this is a great example of the flexibility you get from [factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md) for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install HumanLayer npm install humanlayer Update CLI to send `divide` and `request_more_information` to a human via email cp ./walkthrough/11-cli.ts src/cli.ts Run the CLI npx tsx src/index.ts 'can you divide 4 by 5' The last line of your program should mention human review step nextStep { intent: 'divide', a: 4, b: 5 } HumanLayer: Requested human approval from HumanLayer cloud go ahead and respond to the email with some feedback: ![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true) you should get another email with an updated attempt based on your feedback! You can go ahead and approve this one: ![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true) and your final output will look like nextStep { intent: 'done_for_now', message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!' } The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask! lets implement the `request_more_information` flow as well cp ./walkthrough/11b-cli.ts src/cli.ts lets test the require_approval flow as by asking for a calculation with garbled input: npx tsx src/index.ts 'can you multiply 4 and xyz' You should get an email with a request for clarification Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else? you can response with something like use 8 instead of xyz you should see a final result on the CLI like I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32. as a final step, lets explore using a custom html template for the email cp ./walkthrough/11c-cli.ts src/cli.ts first try with divide: npx tsx src/index.ts 'can you divide 4 by 5' you should see a slightly different email with the custom template ![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true) feel free to run with the flow and then you can try updating the template to your liking (if you're using cursor, something as simple as highlighting the template and asking to "make it better" should do the trick) try triggering "request_more_information" as well! thats it - in the next chapter, we'll build a fully email-driven workflow agent that uses webhooks for human approval # Chapter XX - HumanLayer Webhook Integration the previous sections used the humanlayer SDK in "synchronous mode" - that means every time we wait for human approval, we sit in a loop polling until the human response if received. That's obviously not ideal, especially for production workloads, so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md) by updating the server to end processing after contacting a human, and use webhooks to receive the results. add code to initialize humanlayer in the server cp ./walkthrough/12-1-server-init.ts src/server.ts next, lets update the /thread endpoint to 1. handle requests asynchronously, returning immediately 2. create a human contact on request_more_information and done_for_now calls Update the server to be able to handle request_clarification responses - remove the old /response endpoint and types - update the /thread endpoint to run processing asynchronously, return immediately - send a state.threadId when requesting human responses - add a handleHumanResponse function to process the human response - add a /webhook endpoint to handle the webhook response cp ./walkthrough/12a-server.ts src/server.ts Start the server in another terminal npx tsx src/server.ts now that the server is running, send a payload to the '/thread' endpoint __ do the response step __ now handle approvals for divide __ now also handle done_for_now ================================================ FILE: packages/create-12-factor-agent/template/baml_src/agent.baml ================================================ class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } class RequestApprovalFromManager { intent "request_approval_from_manager" message string } class ProcessRefund { intent "process_refund" @description("you can process a refund for a customer, always request approval from the manager before processing a refund") order_id string amount int | float reason string } type HumanTools = ClarificationRequest | DoneForNow | RequestApprovalFromManager type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool type CustomerSupportTools = ProcessRefund function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools | CustomerSupportTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: packages/create-12-factor-agent/template/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: packages/create-12-factor-agent/template/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.88.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: packages/create-12-factor-agent/template/baml_src/tool_calculator.baml ================================================ class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: packages/create-12-factor-agent/template/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "@boundaryml/baml": "latest", "express": "^5.1.0", "humanlayer": "^0.7.7", "tsx": "^4.15.0", "typescript": "^5.0.0", "zod": "^3.25.64" }, "devDependencies": { "@types/express": "^5.0.1", "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0", "supertest": "^7.1.0" } } ================================================ FILE: packages/create-12-factor-agent/template/src/a2h.ts ================================================ import { z, ZodSchema } from 'zod'; // Types for A2H API objects matching the new schemas // Common metadata type export type Metadata = { uid: string; }; // Message sent by agent to a2h server type MessageSpec> = { agentMessage: string; // message from the agent response_schema?: T; // optional Zod schema for the response channel_id?: string; // optional channel id }; export type Message = ZodSchema> = { apiVersion: "proto.a2h.dev/v1alpha1"; kind: "Message"; metadata: Metadata; spec: MessageSpec; status?: { humanMessage?: string; // message from the human response?: T extends ZodSchema ? z.infer : any; // optional, matches spec schema }; }; export const ApprovalSchema = z.object({ approved: z.boolean(), comment: z.string().optional(), }); export type ApprovalRequest = Message; export type HumanRequest = Message; // NewConversation sent by a2h server to agent type NewConversationSpec = { user_message: string; // message from the human channel_id: string; // channel id to use for future conversations agent_name?: string; // optional agent name or identifier raw?: Record; // optional raw data from the request, e.g. email metadata }; export type NewConversation = { apiVersion: "proto.a2h.dev/v1alpha1"; kind: "NewConversation"; metadata: Metadata; spec: NewConversationSpec; }; // Optionally, you can add union types for future extensibility export type A2HEvent = ZodSchema> = Message | NewConversation; ================================================ FILE: packages/create-12-factor-agent/template/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } awaitingHumanResponse(): boolean { const lastEvent = this.events[this.events.length - 1]; return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); } awaitingHumanApproval(): boolean { const lastEvent = this.events[this.events.length - 1]; return lastEvent.data.intent === 'divide'; } lastEvent(): Event { return this.events[this.events.length - 1]; } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": case "request_approval_from_manager": // response to human, return the thread return thread; case "divide": // divide is scary, return it for human approval return thread; case "add": case "subtract": case "multiply": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: packages/create-12-factor-agent/template/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event, handleNextStep } from "../src/agent"; import { FileSystemThreadStore } from "./state"; import chalk from "chalk"; const threadStore = new FileSystemThreadStore(); export async function cliOuterLoop(message: string) { // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); const threadId = await threadStore.create(thread); // Run the agent loop with the thread // loop until ctrl+c // optional, you could exit on done_for_now and print the final result // while (lastEvent.data.intent !== "done_for_now") { while (true) { let newThread = await agentLoop(thread); await threadStore.update(threadId, newThread); let lastEvent = newThread.lastEvent(); // everything on CLI const responseEvent = await askHumanCLI(lastEvent); newThread.events.push(responseEvent); // if (lastEvent.data.intent === "request_approval_from_manager") { // const responseEvent = await askManager(lastEvent); // thread.events.push(responseEvent); // } else { // const responseEvent = await askHumanCLI(lastEvent); // thread.events.push(responseEvent); // } await threadStore.update(threadId, newThread); } } export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); const message = args.length === 0 ? "hello!" : args.join(" "); await cliOuterLoop(message); } // async function askManager(lastEvent: Event): Promise { // const hl = humanlayer({ // contactChannel: { // email: { // address: process.env.HUMANLAYER_EMAIL_ADDRESS || "manager@example.com" // } // } // }) // const resp = await hl.fetchHumanResponse({ // spec: { // msg: lastEvent.data.message // } // }) // return { // type: "manager_response", // data: resp // } // } async function askHumanCLI(lastEvent: Event): Promise { switch (lastEvent.data.intent) { case "divide": const response = await approveCLI(`agent wants to run ${chalk.green(JSON.stringify(lastEvent.data))}\nPress Enter to approve, or type feedback to cancel:`); if (response.approved) { const thread = new Thread([lastEvent]); const result = await handleNextStep(lastEvent.data, thread); return result.events[result.events.length - 1]; } else { return { type: "tool_response", data: `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } case "request_more_information": case "request_approval_from_manager": case "done_for_now": const message = await messageCLI(lastEvent.data.message); return { type: "tool_response", data: message }; default: throw new Error(`unknown tool in outer loop: ${lastEvent.data.intent}`) } } type Approval = { approved: true; } | { approved: false; comment: string; } async function messageCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { readline.close(); resolve(answer); }); }); } async function approveCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { readline.close(); // If the answer is empty (just pressed enter), treat it as approval if (answer.trim() === '') { resolve({ approved: true }); } else { // Any non-empty response is treated as rejection with feedback resolve({ approved: false, comment: answer }); } }); }); } if (require.main === module) { cli() } ================================================ FILE: packages/create-12-factor-agent/template/src/index.ts ================================================ import { cli } from "./cli" async function main() { await cli() } main().catch(console.error) ================================================ FILE: packages/create-12-factor-agent/template/src/server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop as innerLoop, handleNextStep } from '../src/agent'; import { FileSystemThreadStore, ThreadStore } from '../src/state'; import { ContactChannel, FunctionCall, HumanContact, humanlayer, V1Beta2EmailEventReceived, V1Beta2HumanContactCompleted, V1Beta2SlackEventReceived } from '@humanlayer/sdk'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new FileSystemThreadStore(); type V1Beta3ConversationCreated = { is_test: boolean; type: "conversation.created"; event: { user_message: string; contact_channel_id: number; agent_name: string; } } type CompletedHumanContact = HumanContact & { status: { response: string; } } type V1Veta3HumanContactCompleted = { is_test: boolean; type: "human_contact.completed"; event: { contact_channel_id: number; } & CompletedHumanContact } type Approved = {status: {approved: true}} type Rejected = {status: {approved: false; comment: string}} type CompletedFunctionCall = FunctionCall & (Approved | Rejected) type V1Beta3FunctionCallCompleted = { is_test: boolean; type: "function_call.completed"; event: { contact_channel_id: number; } & CompletedFunctionCall } type V1Beta3Event = V1Beta3ConversationCreated | V1Veta3HumanContactCompleted | V1Beta3FunctionCallCompleted; const notFound = (res: Response) => { res.status(404).json({ error: 'Not Found', message: `Thread not found`, status: 404 }); } const outerLoop = async (req: Request, res: Response) => { console.log("outerLoop", req.body); const body = req.body as V1Beta3Event; const hl = humanlayer({ runId: process.env.HUMANLAYER_RUN_ID || `12fa-agent`, contactChannel: { channel_id: body.event.contact_channel_id, } as ContactChannel // todo export this type flavor }); /* get the thread or make a new one*/ let thread: Thread | undefined; let threadId: string | undefined; switch (body.type) { case "conversation.created": thread = new Thread([{type: "conversation.created", data: body.event.user_message}]); break; case "human_contact.completed": case "function_call.completed": threadId = body.event.spec.state?.thread_id; if (!threadId) { notFound(res); return; } thread = store.get(threadId); if (!thread) { notFound(res); return; } break; } /* handle the response event */ if (body.type === "function_call.completed" && body.event.status?.approved) { // run the function call and add the result to the thread thread = await handleNextStep(thread.lastEvent().data, thread); } else if (body.type === "function_call.completed" && !body.event.status?.approved) { // add the denial to the thread thread.events.push({ type: "human_response", data: `user denied operation ${thread.lastEvent().data.intent} with feedback: ${body.event.status?.comment}` }); } else if (body.type === "human_contact.completed") { // add the human response to the thread thread.events.push({ type: "human_response", data: { msg: body.event.status.response, } }); } /* run the inner loop */ await Promise.resolve().then(async() => { const newThread = await innerLoop(thread); if (threadId) { store.update(threadId, newThread); } else { threadId = store.create(newThread); } // we exited the inner loop, send to human const lastEvent = newThread.lastEvent(); switch (lastEvent.data.intent) { case "request_more_information": case "done_for_now": hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId } } }); console.log(`created human contact "${lastEvent.data.message}"`); break; case "other_scary_tools": // example, add more tools here case "divide": const intent = lastEvent.data.intent; // remove intent from kwargs payload const { intent: _, ...kwargs } = lastEvent.data; hl.createFunctionCall({ spec: { fn: intent, kwargs: kwargs, state: { thread_id: threadId } } }); console.log("created function call", {intent, kwargs}); break; } }); res.json({ status: "ok" }); } export const startServer = () => { app.post('/api/v1/conversations', outerLoop) // Handle 404 - Not Found app.use((req: Request, res: Response) => { res.status(404).json({ error: 'Not Found', message: `Route ${req.originalUrl} not found`, status: 404 }); }); const port = process.env.PORT || 8000; const server = app.listen(port, () => { console.log(`Server is running on port ${port}`); }); server.on('error', (error: Error) => { console.error('Server error:', error); }); return server; } // Only start the server if this file is being run directly if (require.main === module) { startServer(); } ================================================ FILE: packages/create-12-factor-agent/template/src/state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; import { Response } from 'express'; import fs from 'fs/promises'; import path from 'path'; export interface ThreadStore { create(thread: Thread): Promise; get(id: string): Promise; update(id: string, thread: Thread): Promise; } // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class FileSystemThreadStore implements ThreadStore { private threadsDir: string; constructor() { this.threadsDir = path.join(process.cwd(), '.threads'); } async create(thread: Thread): Promise { await fs.mkdir(this.threadsDir, { recursive: true }); const id = crypto.randomUUID(); const filePath = path.join(this.threadsDir, `${id}.json`); const txtPath = path.join(this.threadsDir, `${id}.txt`); await Promise.all([ fs.writeFile(filePath, JSON.stringify(thread, null, 2)), fs.writeFile(txtPath, thread.serializeForLLM()) ]); return id; } async get(id: string): Promise { const filePath = path.join(this.threadsDir, `${id}.json`); const data = await fs.readFile(filePath, 'utf8').catch(() => null); if (!data) return undefined; return new Thread(JSON.parse(data).events); } async update(id: string, thread: Thread): Promise { const filePath = path.join(this.threadsDir, `${id}.json`); const txtPath = path.join(this.threadsDir, `${id}.txt`); await Promise.all([ fs.writeFile(filePath, JSON.stringify(thread, null, 2)), fs.writeFile(txtPath, thread.serializeForLLM()) ]); } } ================================================ FILE: packages/create-12-factor-agent/template/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: packages/walkthroughgen/.gitignore ================================================ .tmptest* ================================================ FILE: packages/walkthroughgen/examples/typescript/.gitignore ================================================ build/ ================================================ FILE: packages/walkthroughgen/examples/typescript/walkthrough/00-package-lock.json ================================================ { "name": "walkthroughgen", "version": "1.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "walkthroughgen", "version": "1.0.0", "license": "ISC", "dependencies": { "typescript": "^5.8.3" } } }, "node_modules/typescript": { "version": "5.8.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" }, "engines": { "node": ">=14.17" } } } ================================================ FILE: packages/walkthroughgen/examples/typescript/walkthrough/00-package.json ================================================ { "name": "walkthroughgen", "version": "1.0.0", "main": "index.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" }, "keywords": [], "author": "", "license": "ISC", "description": "", "dependencies": { "typescript": "^5.8.3" } } ================================================ FILE: packages/walkthroughgen/examples/typescript/walkthrough/00-tsconfig.json ================================================ { "compilerOptions": { "target": "es2016", "module": "commonjs", "esModuleInterop": true, "forceConsistentCasingInFileNames": true, "strict": true, "skipLibCheck": true }, "exclude": ["node_modules", "dist", "**/*walkthrough/**"] } ================================================ FILE: packages/walkthroughgen/examples/typescript/walkthrough/01-index.ts ================================================ const main = () => { console.log("hello world"); }; main(); ================================================ FILE: packages/walkthroughgen/examples/typescript/walkthrough/02-cli.ts ================================================ const cli = () => { const args = process.argv.slice(2); const command = args[0]; const name = args[1]; if (command === "create") { console.log(`Creating ${name}`); } else { console.log("Invalid command: ", command); console.log("available commands: create"); } }; cli(); ================================================ FILE: packages/walkthroughgen/examples/typescript/walkthrough/02-index.ts ================================================ const main = async () => { return cli(); }; main().catch(console.error); ================================================ FILE: packages/walkthroughgen/examples/typescript/walkthrough.yaml ================================================ title: "setting up a typescript cli" text: "this is a walkthrough for setting up a typescript cli" targets: - markdown: "./build/walkthrough.md" # generates a walkthrough.md file onChange: # default behavior - on changes, show diffs and cp commands diff: true cp: true newFiles: # when new files are created, just show the copy command cat: false cp: true - final: "./build/final" # outputs the final project to the final folder - folders: "./build/by-section" # creates a separate working folder for each section sections: - name: setup title: "Copy initial files" steps: - file: {src: ./walkthrough/00-package.json, dest: package.json} - file: {src: ./walkthrough/00-package-lock.json, dest: package-lock.json} - file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json} - name: initialize title: "Initialize the project" steps: - text: "initialize the project" command: | npm install - text: "then add index.ts" file: {src: ./walkthrough/01-index.ts, dest: src/index.ts} - text: "run it with tsx" command: | npx tsx src/index.ts results: - text: "you should see a hello world message" code: | hello world - name: add-cli title: "Add a CLI" steps: - text: "add a cli" file: {src: ./walkthrough/02-cli.ts, dest: src/cli.ts} - text: "update index.ts to use the cli" file: {src: ./walkthrough/02-index.ts, dest: src/index.ts} ================================================ FILE: packages/walkthroughgen/examples/walkthroughgen/walkthrough.yaml ================================================ title: "using walkthroughgen" targets: - markdown: "./walkthrough.md" # generates a walkthrough.md file diffs: true - final: "./final" # outputs the final project to the final folder - folders: "./by-section" # creates a separate working folder for each section init: - file: {src: ./walkthrough/00-package.json, dest: package.json} - file: {src: ./walkthrough/00-package-lock.json, dest: package-lock.json} sections: - name: initialize title: "initialize the project" steps: - text: "initialize walkthroughgen" command: | npx wtg init my-project cd my-project - text: "this will create an empty project with a walkthrough.yaml file" command: | ls -la cat walkthrough.yaml results: - text: "you should see a walkthrough.yaml file" code: | # walkthrough.yaml title: "hello world" sections: - name: initialize title: "initialize the project" steps: - text: "initialize the project" command: | # your code here - name: build title: "build the project" steps: - text: "build the project" command: | npx wtg build - text: "this will create a walkthrough.md file" command: | cat walkthrough.md results: ================================================ FILE: packages/walkthroughgen/jest.config.js ================================================ module.exports = { preset: 'ts-jest', testEnvironment: 'node', testMatch: ['**/test/**/*.ts'], testPathIgnorePatterns: ['/node_modules/', '/test/utils/'], transform: { '^.+\\.ts$': 'ts-jest', }, }; ================================================ FILE: packages/walkthroughgen/package.json ================================================ { "name": "walkthroughgen", "version": "1.0.0", "main": "index.js", "scripts": { "test": "jest", "test:watch": "jest --watch" }, "keywords": [], "author": "", "license": "ISC", "description": "", "dependencies": { "@boundaryml/baml": "^0.85.0", "@types/diff": "^7.0.2", "@types/js-yaml": "^4.0.9", "diff": "^7.0.0", "js-yaml": "^4.1.0", "typescript": "^5.8.3" }, "devDependencies": { "@types/jest": "^29.5.14", "jest": "^29.7.0", "ts-jest": "^29.3.2" } } ================================================ FILE: packages/walkthroughgen/prompt.md ================================================ Walkthroughgen is a tool for creating walkthroughs, tutorials, readmes, and documentation. ## Usage You create a walkthrough by writing a simple yaml file that describes the walkthrough. In the file, you reference the incremental files that should exist at each step of the walkthrough ``` ├── walkthrough │   ├── 00-package-lock.json │   ├── 00-package.json │   ├── 01-index.ts │   ├── 02-cli.ts │   └── 02-index.ts └── walkthrough.yaml ``` Your walkthrough.yaml file might look like this (runnable example in [examples/typescript-cli](./examples/typescript)) ```yaml title: "setting up a typescript cli" text: "this is a walkthrough for setting up a typescript cli" targets: - markdown: "./build/walkthrough.md" # generates a walkthrough.md file onChange: # default behavior - on changes, show diffs and cp commands diff: true cp: true newFiles: # when new files are created, just show the copy command cat: false cp: true - final: "./build/final" # outputs the final project to the final folder - folders: "./build/by-section" # creates a separate working folder for each section sections: - name: setup title: "Copy initial files" steps: - file: {src: ./walkthrough/00-package.json, dest: package.json} - file: {src: ./walkthrough/00-package-lock.json, dest: package-lock.json} - file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json} - name: initialize title: "Initialize the project" steps: - text: "initialize the project" command: | npm install - text: "then add index.ts" file: {src: ./walkthrough/01-index.ts, dest: src/index.ts} - text: "run it with tsx" command: | npx tsx src/index.ts results: - text: "you should see a hello world message" code: | hello world - name: add-cli title: "Add a CLI" steps: - text: "add a cli" file: {src: ./walkthrough/02-cli.ts, dest: src/cli.ts} - text: "add a cli" file: {src: ./walkthrough/02-index.ts, dest: src/index.ts} ``` Build the project with: ``` npm i -g wtg wtg build ``` based on your targets, this would create the following files ``` ├── walkthrough │   ├── 00-package-lock.json │   ├── 00-package.json │   ├── 01-index.ts │   ├── 02-cli.ts │   └── 02-index.ts ├── build │ ├── by-section │ │ ├── 00-initialize # only contains the files in `init` │ │ │ ├── readme.md # contains steps for this section │ │ │ ├── package.json │ │ │ ├── package-lock.json │ │ │ └── tsconfig.json │ │ └── 01-add-cli # contains the files up to the START of section 1 │ │ ├── readme.md # contains steps for this section │ │ ├── package.json │ │ ├── package-lock.json │ │ ├── tsconfig.json │ │ └── src │ │ └── index.ts │ ├── final │ │ ├── package.json │ │ ├── package-lock.json │ │ ├── tsconfig.json │ │ └── src │ │ ├── cli.ts │ │ └── index.ts │ └── walkthrough.md and your walkthrough.md file will look like: ```markdown # Setting up a typescript cli this is a walkthrough for setting up a typescript cli ## Copy initial files cp walkthrough/00-package.json package.json cp walkthrough/00-package-lock.json package-lock.json cp walkthrough/00-tsconfig.json tsconfig.json ## Initialize the project initialize the project npm install then add index.ts cp walkthrough/01-index.ts src/index.ts and run it with tsx npx tsx src/index.ts you should see a hello world message hello world ## Add a CLI add a cli ``` ``` cp walkthrough/02-cli.ts src/cli.ts update index.ts to use the cli ```diff const main = async () => { + return cli(); }; main(); ``` or just: cp walkthrough/02-index.ts src/index.ts ``` ## Features ### Targets - `file`: generates a single markdown file - `folder`: creates a set of folders, one for each section - `final`: outputs the final project to the current directory ### Init ### Sections ### Steps #### Step ## Walkthrough.yaml for walkthroughgen ## Implementation Plan - [ ] implement core walkthroughgen CLI - `wtg build` # defaults to walkthrough.yaml in current directory - Scope 1: generating walkthrough.md - [ ] create end-to-end test for a simple walkthrough file, just a single yaml file with no sections - [ ] create end-to-end test for a walkthrough file with a single section - [ ] test generation of diffs and cp commands - Scope 2: generating final/ project build - [ ] create end-to-end test for a walkthrough file with a final target - Scope 3: generating by-section project builds with readmes - [ ] create end-to-end test for a walkthrough file with a by-section target ================================================ FILE: packages/walkthroughgen/readme.md ================================================ # Walkthroughgen Walkthroughgen is a tool for creating walkthroughs, tutorials, readmes, and documentation. It helps you maintain step-by-step guides by generating markdown and working directories from a simple YAML configuration. ## Features - 📝 **Markdown Generation**: Create beautiful markdown files with diffs, code blocks, and collapsible sections - 📁 **Working Directories**: Generate separate directories for each section of your walkthrough - 🔄 **Incremental Changes**: Track and display changes between steps - 🎯 **Multiple Targets**: Output to markdown, section folders, and final project state - 📦 **File Management**: Copy files, create directories, and run commands - 🔍 **Rich Diffs**: Show meaningful diffs between file versions - 📚 **Section READMEs**: Generate per-section documentation ## Installation ```bash npm install -g walkthroughgen ``` ## Quick Start 1. Create a `walkthrough.yaml` file: ```yaml title: "My Tutorial" text: "A step-by-step guide" targets: - markdown: "./walkthrough.md" onChange: diff: true cp: true - folders: path: "./by-section" final: dirName: "final" sections: - name: setup title: "Initial Setup" steps: - file: {src: ./files/package.json, dest: package.json} - command: "npm install" ``` 2. Run the generator: ```bash walkthroughgen generate walkthrough.yaml ``` ## Directory Structure A typical walkthrough project looks like this: ``` my-tutorial/ ├── walkthrough/ # Source files for each step │ ├── 00-package.json │ ├── 01-index.ts │ └── 02-config.ts ├── walkthrough.yaml # Walkthrough configuration └── build/ # Generated output ├── by-section/ # Section-by-section working directories │ ├── 00-setup/ │ └── 01-config/ ├── final/ # Final project state └── walkthrough.md # Generated markdown ``` ## Walkthrough.yaml Configuration ### Top-Level Fields - `title`: Title of the walkthrough - `text`: Introduction text - `targets`: Output configuration - `sections`: Tutorial sections ### Targets #### Markdown Target ```yaml targets: - markdown: "./output.md" onChange: diff: true # Show diffs for changed files cp: true # Show cp commands newFiles: cat: false # Don't show file contents cp: true # Show cp commands ``` #### Folders Target ```yaml targets: - folders: path: "./by-section" # Base path for section folders skip: ["cleanup"] # Sections to skip final: dirName: "final" # Name for final state directory ``` ### Sections Each section represents a logical step in your tutorial: ```yaml sections: - name: setup # Used for folder naming and skip array title: "Initial Setup" # Display title text: "Setup steps..." # Section description steps: # ... steps ... ``` ### Steps Steps define the actions to take: #### File Copy ```yaml steps: - text: "Copy package.json" file: src: ./files/package.json dest: package.json ``` #### Directory Creation ```yaml steps: - text: "Create src directory" dir: create: true path: src ``` #### Command Execution ```yaml steps: - text: "Install dependencies" command: "npm install" incremental: true # run when building up folders target ``` #### Command Results ```yaml steps: - command: "npm run test" results: - text: "You should see:" code: | All tests passed! ``` ## Generated Output ### Markdown Features - **File Diffs**: Shows changes between versions - **Copy Commands**: Easy-to-follow file copy instructions - **Collapsible Sections**: Hide/show file contents - **Code Highlighting**: Syntax highlighting for various languages Example markdown output: ~~~markdown # Initial Setup Copy the package.json: cp ./files/package.json package.json
show file ```json { "name": "my-project", "version": "1.0.0" } ```
Install dependencies: npm install You should see: added 123 packages ~~~ ### Section Folders The `folders` target creates: 1. A directory for each section 2. Section-specific README.md files 3. Working project state 4. Optional final state directory ## Examples See the [examples](./examples) directory for complete examples: - [TypeScript CLI](./examples/typescript): Basic TypeScript project setup - [Walkthroughgen](./examples/walkthroughgen): Self-documenting example ## Tips 1. Use meaningful section names - they become folder names 2. Include context in step text 3. Use `incremental: true` for commands that modify state 4. Leverage diffs to highlight important changes 5. Use the `skip` array to exclude setup/cleanup sections from output ## Contributing Contributions welcome! Please read [CONTRIBUTING.md](./CONTRIBUTING.md) for details. ## License This project is licensed under the MIT License - see the [LICENSE](./LICENSE) file for details. ================================================ FILE: packages/walkthroughgen/src/cli.ts ================================================ import * as fs from 'fs'; import * as path from 'path'; import * as yaml from 'js-yaml'; import * as Diff from 'diff'; import { execSync } from 'child_process'; interface Section { title: string; text?: string; name?: string; // Optional, used for folder naming steps?: Array<{ text?: string; // Make text optional file?: { src: string; dest: string }; command?: string; incremental?: boolean; // New field: if true, command only runs for folders target dir?: { create: boolean; path: string }; // Added dir step type results?: Array<{ text: string; code: string }>; }>; } interface WalkthroughData { title: string; text: string; sections?: Section[]; targets?: Array<{ markdown?: string; folders?: { path: string; // Path for section folders, e.g. "./build/by-section" skip?: string[]; // Section names to skip folder creation for final?: { dirName: string; // Name of the final directory containing all steps' results }; }; onChange?: { diff?: boolean; cp?: boolean }; newFiles?: { cat?: boolean; cp?: boolean }; }>; } function getSectionBaseName(section: Section): string { return section.name || section.title.toLowerCase().replace(/[^a-z0-9]+/g, '-'); } function copySourceFiles(srcFile: string, projectRoot: string, sectionDir: string): void { const srcAbsPath = path.resolve(projectRoot, srcFile); const relPath = path.relative(projectRoot, srcAbsPath); const destPath = path.join(sectionDir, relPath); fs.mkdirSync(path.dirname(destPath), { recursive: true }); fs.copyFileSync(srcAbsPath, destPath); } function copyWorkingFile(srcFile: string, destFile: string, sectionDir: string): void { const srcPath = path.join(sectionDir, srcFile); const destPath = path.join(sectionDir, destFile); fs.mkdirSync(path.dirname(destPath), { recursive: true }); fs.copyFileSync(srcPath, destPath); } function copyDirectory(src: string, dest: string): void { if (!fs.existsSync(src)) return; fs.mkdirSync(dest, { recursive: true }); const entries = fs.readdirSync(src, { withFileTypes: true }); for (const entry of entries) { const srcPath = path.join(src, entry.name); const destPath = path.join(dest, entry.name); if (entry.isDirectory()) { copyDirectory(srcPath, destPath); } else { fs.copyFileSync(srcPath, destPath); } } } function applyStepsToWorkingDir( steps: Section['steps'], projectRoot: string, workingDir: string, sectionPath: string | null = null // If provided, also copy source files to section's walkthrough/ ): void { if (!steps) return; for (const step of steps) { // Handle dir creation if (step.dir?.create) { const dirToCreate = path.join(workingDir, step.dir.path); fs.mkdirSync(dirToCreate, { recursive: true }); } // Handle file copy if (step.file?.src) { // Copy to working directory const srcAbsPath = path.resolve(projectRoot, step.file.src); const destPath = path.join(workingDir, step.file.dest); fs.mkdirSync(path.dirname(destPath), { recursive: true }); fs.copyFileSync(srcAbsPath, destPath); // If a section path is provided, also copy source file to section's walkthrough/ if (sectionPath) { copySourceFiles(step.file.src, projectRoot, sectionPath); } } // Handle command execution - only run if incremental is explicitly true if (step.command && step.incremental === true) { try { execSync(step.command, { cwd: workingDir, stdio: 'inherit' }); } catch (error) { console.error(`Error executing incremental command "${step.command}" in ${workingDir}:`, error); // Log error but continue, matching behavior of file copy errors } } } } function generateSectionMarkdown(section: Section): string { let markdown = `# ${section.title}\n\n`; if (section.text) { markdown += `${section.text}\n\n`; } if (section.steps) { for (const step of section.steps) { if (step.text) { markdown += `${step.text}\n\n`; } if (step.dir?.create) { markdown += ` mkdir -p ${step.dir.path}\n\n`; } if (step.file) { markdown += ` cp ${step.file.src} ${step.file.dest}\n\n`; } if (step.command) { markdown += ` ${step.command.trim()}\n\n`; } if (step.results) { for (const result of step.results) { markdown += `${result.text}\n\n`; if (result.code) { markdown += result.code.trim().split('\n').map(line => ` ${line}`).join('\n') + '\n\n'; } } } } } return markdown; } function formatMinimalDiff(filePath: string, oldContent: string, newContent: string): string | null { // Normalize line endings in both inputs const normalize = (str: string) => str.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); const normalizedOld = normalize(oldContent); const normalizedNew = normalize(newContent); if (normalizedOld === normalizedNew) { return null; } // Using context: 2 to show some surrounding lines const patch = Diff.createPatch(filePath, normalizedOld, normalizedNew, '', '', { context: 2 }); const patchLines = patch.split('\n'); const effectiveChangeLines: string[] = []; let i = 0; while (i < patchLines.length) { const line = patchLines[i]; // Skip standard patch headers and hunk metadata if (line.startsWith('---') || line.startsWith('+++') || line.startsWith('@@')) { i++; continue; } // Check for identical remove/add pairs (which means no effective change for these two lines) if (line.startsWith('-')) { let nextDiffLineIndex = i + 1; // Skip empty lines AND "No newline" markers AND context lines to find the next actual diff line while (nextDiffLineIndex < patchLines.length && (patchLines[nextDiffLineIndex].trim() === '' || patchLines[nextDiffLineIndex].startsWith('\\') || patchLines[nextDiffLineIndex].startsWith(' '))) { nextDiffLineIndex++; } if (nextDiffLineIndex < patchLines.length && patchLines[nextDiffLineIndex].startsWith('+')) { const removedText = line.substring(1).trim(); const addedText = patchLines[nextDiffLineIndex].substring(1).trim(); if (removedText === addedText) { // Advance i past the current line, any skipped empty lines, and the matched added line i = nextDiffLineIndex + 1; continue; } } } // If the line starts with +, -, or space (context), it's a line to be included if (line.startsWith('+') || line.startsWith('-') || line.startsWith(' ')) { effectiveChangeLines.push(line); } i++; } if (effectiveChangeLines.length > 0) { return `\`\`\`diff\n${filePath}\n${effectiveChangeLines.join('\n')}\n\`\`\`\n\n`; } return null; } function generateRichSectionMarkdown( section: Section, projectRoot: string, sectionWorkingDir: string, walkthroughTargets: WalkthroughData['targets'] ): string { let markdown = `# ${section.title}\n\n`; if (section.text) { markdown += `${section.text}\n\n`; } // Initialize section's virtual file state from the actual files in sectionWorkingDir const sectionVirtualFileState = new Map(); if (fs.existsSync(sectionWorkingDir)) { const readFilesRecursively = (dir: string) => { const entries = fs.readdirSync(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(dir, entry.name); const relativePath = path.relative(sectionWorkingDir, fullPath); if (entry.isDirectory()) { readFilesRecursively(fullPath); } else { try { const content = fs.readFileSync(fullPath, 'utf8'); sectionVirtualFileState.set(relativePath, content); } catch (error) { console.warn(`Warning: Could not read file ${fullPath} for section README state`); } } } }; readFilesRecursively(sectionWorkingDir); } if (section.steps) { for (const step of section.steps) { if (step.text) { markdown += `${step.text}\n\n`; } if (step.dir?.create) { markdown += ` mkdir -p ${step.dir.path}\n\n`; } if (step.file) { const srcAbsolutePath = path.resolve(projectRoot, step.file.src); const destRelativePath = path.normalize(step.file.dest); let newContent: string; try { newContent = fs.readFileSync(srcAbsolutePath, 'utf8'); } catch (error: any) { console.warn(`Warning: Could not read source file ${srcAbsolutePath} for step: ${step.text || 'Unnamed step'}`); continue; } const isExistingVirtualFile = sectionVirtualFileState.has(destRelativePath); const oldContent = isExistingVirtualFile ? sectionVirtualFileState.get(destRelativePath)! : ''; if (isExistingVirtualFile) { // File is being changed/overwritten const shouldDiff = walkthroughTargets?.[0]?.onChange?.diff === true; let diffPrintedThisStep = false; if (shouldDiff && oldContent !== newContent) { const diffOutput = formatMinimalDiff(destRelativePath, oldContent, newContent); if (diffOutput) { markdown += diffOutput; diffPrintedThisStep = true; } } const showCp = walkthroughTargets?.[0]?.onChange?.cp !== false; if (showCp) { const cpCommand = `cp ${step.file.src} ${step.file.dest}`; if (diffPrintedThisStep) { markdown += `
\nskip this step\n\n`; markdown += ` ${cpCommand}\n\n`; markdown += `
\n\n`; } else { markdown += ` ${cpCommand}\n\n`; // Add "show file" details block let lang = path.extname(step.file.src).substring(1); if (lang === 'baml') { lang = 'rust'; } markdown += `
\nshow file\n\n`; markdown += `\`\`\`${lang}\n`; markdown += `// ${step.file.src}\n`; markdown += `${newContent.trim()}\n`; markdown += `\`\`\`\n\n`; markdown += `
\n\n`; } } } else { // New file const showCpForNew = walkthroughTargets?.[0]?.newFiles?.cp !== false; if (showCpForNew) { const cpCommand = `cp ${step.file.src} ${step.file.dest}`; markdown += ` ${cpCommand}\n\n`; // Add "show file" details block let lang = path.extname(step.file.src).substring(1); if (lang === 'baml') { lang = 'rust'; } markdown += `
\nshow file\n\n`; markdown += `\`\`\`${lang}\n`; markdown += `// ${step.file.src}\n`; markdown += `${newContent.trim()}\n`; markdown += `\`\`\`\n\n`; markdown += `
\n\n`; } } sectionVirtualFileState.set(destRelativePath, newContent); } if (step.command) { markdown += step.command.trim().split('\n').map(line => ` ${line}`).join('\n') + '\n\n'; } if (step.results) { for (const result of step.results) { markdown += `${result.text}\n\n`; if (result.code) { markdown += result.code.trim().split('\n').map(line => ` ${line}`).join('\n') + '\n\n'; } } } } } return markdown; } export const cli = (argv: string[]) => { if (argv.includes("--help") || argv.includes("-h")) { console.log(` USAGE: walkthroughgen generate [options] OPTIONS: --help, -h Show help generate Generate markdown from YAML file `); return; } if (argv[0] === "generate") { if (!argv[1]) { console.error("Error: YAML file path is required for 'generate' command."); console.log("Usage: walkthroughgen generate "); return; } const yamlPath = argv[1]; let yamlContent; try { yamlContent = fs.readFileSync(yamlPath, 'utf8'); } catch (error: any) { console.error(`Error: Could not read YAML file at '${yamlPath}'.`); console.error(error.message); return; } let data: WalkthroughData; try { data = yaml.load(yamlContent) as WalkthroughData; } catch (error: any) { console.error(`Error: Could not parse YAML content from '${yamlPath}'.`); console.error(error.message); return; } if (!data || typeof data.title !== 'string' || typeof data.text !== 'string') { console.error(`Error: Invalid YAML structure in '${yamlPath}'. Missing required 'title' or 'text' fields.`); return; } // Track virtual file state for diff generation const projectRoot = path.dirname(yamlPath); const virtualFileState = new Map(); // Process folders target first if (data.targets) { for (const target of data.targets) { // Ensure target.folders is an object with a path property if (target.folders && typeof target.folders === 'object' && target.folders.path) { const currentFoldersTarget = target.folders; // Assign to a new const for type narrowing const foldersBasePath = path.join(path.dirname(yamlPath), currentFoldersTarget.path); console.log('Creating folders base path:', foldersBasePath); fs.mkdirSync(foldersBasePath, { recursive: true }); // Create a temporary working directory to build up state const workingDirName = `.tmp-working-${Date.now()}`; const workingDir = path.join(foldersBasePath, workingDirName); console.log('Creating working directory:', workingDir); fs.mkdirSync(workingDir, { recursive: true }); try { // Create section folders and build up working state if (data.sections) { let visibleSectionIndex = 0; // Counter for non-skipped sections data.sections.forEach((section, originalIndex) => { const baseName = getSectionBaseName(section); // For logging, use original index to be clear about which section from YAML it is const logSectionIdentifier = `${String(originalIndex).padStart(2, '0')}-${baseName}`; console.log('Processing section:', logSectionIdentifier, 'with name:', section.name); const shouldSkip = currentFoldersTarget.skip?.includes(section.name || ''); let sectionPathForApplySteps: string | null = null; if (!shouldSkip) { // Use visibleSectionIndex for the actual folder name const sectionFolderName = `${String(visibleSectionIndex).padStart(2, '0')}-${baseName}`; const sectionPath = path.join(foldersBasePath, sectionFolderName); console.log('Creating section folder:', sectionPath); fs.mkdirSync(sectionPath, { recursive: true }); // Copy current working state to section folder if (fs.existsSync(workingDir) && fs.readdirSync(workingDir).length > 0) { copyDirectory(workingDir, sectionPath); } // Generate and write section README const sectionMarkdown = generateRichSectionMarkdown(section, projectRoot, sectionPath, data.targets); fs.writeFileSync(path.join(sectionPath, 'README.md'), sectionMarkdown); sectionPathForApplySteps = sectionPath; visibleSectionIndex++; // Increment only for sections that get a folder } // Apply steps to working directory applyStepsToWorkingDir(section.steps, projectRoot, workingDir, sectionPathForApplySteps); }); // Create final directory if specified if (currentFoldersTarget.final?.dirName) { const finalDirPath = path.join(foldersBasePath, currentFoldersTarget.final.dirName); fs.mkdirSync(finalDirPath, { recursive: true }); copyDirectory(workingDir, finalDirPath); // Optional: Generate cumulative README for final directory const finalReadme = data.sections .filter(s => !currentFoldersTarget.skip?.includes(s.name || '')) .map(s => generateSectionMarkdown(s)) .join('\n'); fs.writeFileSync(path.join(finalDirPath, 'README.md'), finalReadme); } } } finally { // Clean up working directory if (fs.existsSync(workingDir)) { fs.rmSync(workingDir, { recursive: true, force: true }); } } } } } let markdown = `# ${data.title}\n\n${data.text}\n\n`; if (data.sections) { for (const section of data.sections) { markdown += `## ${section.title}\n\n`; if (section.text) { markdown += `${section.text}\n\n`; } if (section.steps) { for (const step of section.steps) { if (step.text) { // Only add step.text if it exists markdown += `${step.text}\n\n`; } if (step.file) { const srcAbsolutePath = path.resolve(projectRoot, step.file.src); const destRelativePath = path.normalize(step.file.dest); let newContent: string; try { newContent = fs.readFileSync(srcAbsolutePath, 'utf8'); } catch (error: any) { console.warn(`Warning: Could not read source file ${srcAbsolutePath} for step: ${step.text || 'Unnamed step'}`); continue; } const isExistingVirtualFile = virtualFileState.has(destRelativePath); const oldContent = isExistingVirtualFile ? virtualFileState.get(destRelativePath)! : ''; if (isExistingVirtualFile) { // File is being changed/overwritten const shouldDiff = data.targets?.[0]?.onChange?.diff === true; let diffPrintedThisStep = false; if (shouldDiff && oldContent !== newContent) { const diffOutput = formatMinimalDiff(destRelativePath, oldContent, newContent); if (diffOutput) { markdown += diffOutput; diffPrintedThisStep = true; } } const showCp = data.targets?.[0]?.onChange?.cp !== false; if (showCp) { const cpCommand = `cp ${step.file.src} ${step.file.dest}`; if (diffPrintedThisStep) { markdown += `
\nskip this step\n\n`; markdown += ` ${cpCommand}\n\n`; markdown += `
\n\n`; } else { markdown += ` ${cpCommand}\n\n`; // Add "show file" details block let lang = path.extname(step.file.src).substring(1); if (lang === 'baml') { lang = 'rust'; } markdown += `
\nshow file\n\n`; markdown += `\`\`\`${lang}\n`; markdown += `// ${step.file.src}\n`; markdown += `${newContent.trim()}\n`; markdown += `\`\`\`\n\n`; markdown += `
\n\n`; } } } else { // New file const showCpForNew = data.targets?.[0]?.newFiles?.cp !== false; if (showCpForNew) { const cpCommand = `cp ${step.file.src} ${step.file.dest}`; markdown += ` ${cpCommand}\n\n`; // Add "show file" details block let lang = path.extname(step.file.src).substring(1); if (lang === 'baml') { lang = 'rust'; } markdown += `
\nshow file\n\n`; markdown += `\`\`\`${lang}\n`; markdown += `// ${step.file.src}\n`; markdown += `${newContent.trim()}\n`; markdown += `\`\`\`\n\n`; markdown += `
\n\n`; } } virtualFileState.set(destRelativePath, newContent); } if (step.command) { // Always show commands in markdown let commandLine = ` ${step.command.trim()}`; markdown += commandLine; markdown += "\n\n"; } if (step.results) { for (const result of step.results) { markdown += `${result.text}\n\n`; if (result.code) { markdown += result.code.trim().split('\n').map(line => ` ${line}`).join('\n') + '\n\n'; } } } } } } } const outputPath = data.targets?.[0]?.markdown ? path.join(path.dirname(yamlPath), data.targets[0].markdown) : path.join(path.dirname(yamlPath), 'walkthrough.md'); try { fs.mkdirSync(path.dirname(outputPath), { recursive: true }); fs.writeFileSync(outputPath, markdown); console.log(`Successfully generated walkthrough to ${outputPath}`); } catch (error: any) { console.error(`Error: Could not write markdown file to '${outputPath}'.`); console.error(error.message); return; } return; } console.log("Unknown command. Available commands: generate. Use --help for more info."); }; ================================================ FILE: packages/walkthroughgen/src/index.ts ================================================ import { cli } from "./cli"; const main = async () => { cli(process.argv.slice(2)); }; main().catch(console.error); ================================================ FILE: packages/walkthroughgen/test/e2e/test-e2e.ts ================================================ import * as fs from 'fs'; import * as path from 'path'; import { cli } from "../../src/cli"; import { withMockedConsole } from "../utils/console-mock"; import { withTmpDir } from "../utils/temp-dir"; describe("CLI basics", () => { it("should handle --help flag", () => { const output = withMockedConsole(() => { cli(["--help"]); }); expect(output).toContain("USAGE:"); expect(output).toContain("OPTIONS:"); expect(output).toContain("--help, -h"); }); it("should handle -h flag", () => { const output = withMockedConsole(() => { cli(["-h"]); }); expect(output).toContain("USAGE:"); expect(output).toContain("OPTIONS:"); expect(output).toContain("--help, -h"); }); it("should show error for missing yaml file path", () => { const output = withMockedConsole(() => { cli(["generate"]); }); expect(output).toContain("Error: YAML file path is required"); }); it("should show error for non-existent yaml file", () => { const output = withMockedConsole(() => { cli(["generate", "non-existent.yaml"]); }); expect(output).toContain("Error: Could not read YAML file"); }); it("should show error for invalid yaml content", () => { withTmpDir((tempDir: string) => { fs.writeFileSync( path.join(tempDir, 'invalid.yaml'), `invalid: yaml: content: [}` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "invalid.yaml")]); }); expect(output).toContain("Error: Could not parse YAML content"); }); }); it("should show error for missing required fields", () => { withTmpDir((tempDir: string) => { fs.writeFileSync( path.join(tempDir, 'missing-fields.yaml'), `some_field: "some value"` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "missing-fields.yaml")]); }); expect(output).toContain("Error: Invalid YAML structure"); expect(output).toContain("Missing required 'title' or 'text' fields"); }); }); it("should show unknown command message", () => { const output = withMockedConsole(() => { cli(["unknown"]); }); expect(output).toContain("Unknown command"); expect(output).toContain("Available commands: generate"); }); }); describe("CLI generate basic markdown", () => { it("should generate basic markdown", () => { withTmpDir((tempDir: string) => { fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "setting up a typescript cli" text: "this is a walkthrough for setting up a typescript cli"` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "walkthrough.yaml")]); }); expect(fs.existsSync(path.join(tempDir, 'walkthrough.md'))).toBe(true); const content = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8'); expect(content).toContain("# setting up a typescript cli"); expect(content).toContain("this is a walkthrough for setting up a typescript cli"); expect(output).toContain("Successfully generated walkthrough"); }); }); it("should generate markdown with a section", () => { withTmpDir((tempDir: string) => { fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "setting up a typescript cli" text: "this is a walkthrough for setting up a typescript cli" sections: - title: "Installation" text: "First, let's install the necessary dependencies"` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "walkthrough.yaml")]); }); expect(fs.existsSync(path.join(tempDir, 'walkthrough.md'))).toBe(true); const content = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8'); expect(content).toContain("# setting up a typescript cli"); expect(content).toContain("this is a walkthrough for setting up a typescript cli"); expect(content).toContain("## Installation"); expect(content).toContain("First, let's install the necessary dependencies"); expect(output).toContain("Successfully generated walkthrough"); }); }); it("should generate markdown with sections and steps", () => { withTmpDir((tempDir: string) => { fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true }); fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "setting up a typescript cli" text: "this is a walkthrough for setting up a typescript cli" targets: - markdown: "./build/walkthrough.md" onChange: diff: true cp: true sections: - name: setup title: "Initial Setup" steps: - text: "Create package.json" file: {src: ./walkthrough/00-package.json, dest: package.json} - text: "Install dependencies" command: | npm install results: - text: "You should see packages being installed" code: | added 123 packages` ); fs.writeFileSync( path.join(tempDir, 'walkthrough/00-package.json'), `{ "name": "walkthroughgen", "version": "1.0.0", "description": "A CLI tool for generating walkthroughs", "dependencies": { "typescript": "^5.0.0" } }` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "walkthrough.yaml")]); }); expect(fs.existsSync(path.join(tempDir, 'build/walkthrough.md'))).toBe(true); const content = fs.readFileSync(path.join(tempDir, 'build/walkthrough.md'), 'utf8').replace(/\r\n/g, '\n'); expect(content).toContain(` # setting up a typescript cli this is a walkthrough for setting up a typescript cli ## Initial Setup Create package.json cp ./walkthrough/00-package.json package.json
show file \`\`\`json // ./walkthrough/00-package.json { "name": "walkthroughgen", "version": "1.0.0", "description": "A CLI tool for generating walkthroughs", "dependencies": { "typescript": "^5.0.0" } } \`\`\`
Install dependencies npm install You should see packages being installed added 123 packages `.trim()); expect(output).toContain("Successfully generated walkthrough"); }); }); }); describe("CLI generate from example", () => { it("should generate markdown from the typescript example", () => { withTmpDir((tempDir: string) => { const exampleBasePath = path.resolve(__dirname, '../../examples/typescript'); const exampleWalkthroughDir = path.join(exampleBasePath, 'walkthrough'); // Copy walkthrough.yaml const sourceYamlPath = path.join(exampleBasePath, 'walkthrough.yaml'); const destYamlPath = path.join(tempDir, 'walkthrough.yaml'); fs.copyFileSync(sourceYamlPath, destYamlPath); // Copy walkthrough directory recursively const destWalkthroughSubDir = path.join(tempDir, 'walkthrough'); fs.cpSync(exampleWalkthroughDir, destWalkthroughSubDir, { recursive: true }); // Run CLI const output = withMockedConsole(() => { cli(["generate", destYamlPath]); }); // Assertions const expectedMarkdownPath = path.join(tempDir, 'build/walkthrough.md'); expect(fs.existsSync(expectedMarkdownPath)).toBe(true); expect(output).toContain("Successfully generated walkthrough"); // Content checks const markdownContent = fs.readFileSync(expectedMarkdownPath, 'utf8').replace(/\r\n/g, '\n'); expect(markdownContent).toContain("# setting up a typescript cli"); expect(markdownContent).toContain("## Copy initial files"); expect(markdownContent).toContain("cp ./walkthrough/00-package.json package.json"); }); }); }); describe("CLI generate with diffs", () => { it("should show diffs when files are overwritten", () => { withTmpDir((tempDir: string) => { fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true }); // Create initial package.json fs.writeFileSync( path.join(tempDir, 'walkthrough/v1-package.json'), `{ "name": "example", "version": "1.0.0", "dependencies": { "typescript": "^5.0.0" } }` ); // Create updated package.json with a new dependency fs.writeFileSync( path.join(tempDir, 'walkthrough/v2-package.json'), `{ "name": "example", "version": "1.0.0", "dependencies": { "typescript": "^5.0.0", "express": "^4.18.0" } }` ); // Create walkthrough.yaml that updates package.json fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "Test Diff Generation" text: "Testing diff generation for file updates" targets: - markdown: "./walkthrough.md" onChange: diff: true cp: true sections: - title: "Initial Setup" steps: - text: "Create initial package.json" file: {src: ./walkthrough/v1-package.json, dest: package.json} - title: "Add Express" steps: - text: "Add express dependency" file: {src: ./walkthrough/v2-package.json, dest: package.json}` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "walkthrough.yaml")]); }); expect(fs.existsSync(path.join(tempDir, 'walkthrough.md'))).toBe(true); const content = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8').replace(/\r\n/g, '\n'); // First file copy should not have a diff (it's new) expect(content).toContain("Create initial package.json"); expect(content).toContain("cp ./walkthrough/v1-package.json package.json"); expect(content).toContain(`
show file \`\`\`json // ./walkthrough/v1-package.json { "name": "example", "version": "1.0.0", "dependencies": { "typescript": "^5.0.0" } } \`\`\`
`); // Second file copy should have a diff (it's an update) expect(content).toContain("Add express dependency"); expect(content).toContain("```diff\npackage.json\n \"version\": \"1.0.0\",\n \"dependencies\": {\n- \"typescript\": \"^5.0.0\"\n+ \"typescript\": \"^5.0.0\",\n+ \"express\": \"^4.18.0\"\n }\n }"); expect(content).toContain(`
skip this step cp ./walkthrough/v2-package.json package.json
`); expect(output).toContain("Successfully generated walkthrough"); }); }); }); describe("CLI generate with folders target", () => { it("should create base folders directory", () => { withTmpDir((tempDir: string) => { fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "Test Folders" text: "Testing folders target" targets: - folders: { path: "./build/by-section" } sections: - title: "First Section" text: "First section text"` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "walkthrough.yaml")]); }); expect(fs.existsSync(path.join(tempDir, 'build/by-section'))).toBe(true); expect(output).toContain("Successfully generated walkthrough"); }); }); it("should create first section folder with README", () => { withTmpDir((tempDir: string) => { fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "Test Folders" text: "Testing folders target" targets: - folders: { path: "./build/by-section" } sections: - name: first-section title: "First Section" text: "First section text"` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "walkthrough.yaml")]); }); const sectionPath = path.join(tempDir, 'build/by-section/00-first-section'); expect(fs.existsSync(sectionPath)).toBe(true); expect(fs.existsSync(path.join(sectionPath, 'README.md'))).toBe(true); // Check README content const readmeContent = fs.readFileSync(path.join(sectionPath, 'README.md'), 'utf8'); expect(readmeContent).toContain("# First Section"); expect(readmeContent).toContain("First section text"); }); }); it("should copy files to the section's working directory", () => { withTmpDir((tempDir: string) => { // Create source file fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true }); fs.writeFileSync( path.join(tempDir, 'walkthrough/file.ts'), 'console.log("hello");' ); // Create walkthrough.yaml fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "Test Folders" text: "Testing folders target" targets: - folders: { path: "./build/by-section" } sections: - name: first-section title: "First Section" text: "First section text" steps: - text: "Add a file" file: {src: ./walkthrough/file.ts, dest: src/file.ts}` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "walkthrough.yaml")]); }); // Check source file was copied to section's walkthrough directory const sectionPath = path.join(tempDir, 'build/by-section/00-first-section'); expect(fs.existsSync(path.join(sectionPath, 'walkthrough/file.ts'))).toBe(true); // Check file was NOT copied to its destination within the section // (section folders only contain state BEFORE their own steps) expect(fs.existsSync(path.join(sectionPath, 'src/file.ts'))).toBe(false); // Check README includes the step const readmeContent = fs.readFileSync(path.join(sectionPath, 'README.md'), 'utf8'); expect(readmeContent).toContain("Add a file"); expect(readmeContent).toContain("cp ./walkthrough/file.ts src/file.ts"); }); }); it("should include files from previous sections", () => { withTmpDir((tempDir: string) => { // Create source files fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true }); fs.writeFileSync( path.join(tempDir, 'walkthrough/file1.ts'), 'console.log("hello 1");' ); fs.writeFileSync( path.join(tempDir, 'walkthrough/file2.ts'), 'console.log("hello 2");' ); // Create walkthrough.yaml with two sections fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "Test Folders" text: "Testing folders target" targets: - folders: { path: "./build/by-section" } sections: - name: first-section title: "First Section" text: "First section text" steps: - text: "Add first file" file: {src: ./walkthrough/file1.ts, dest: src/file1.ts} - name: second-section title: "Second Section" text: "Second section text" steps: - text: "Add second file" file: {src: ./walkthrough/file2.ts, dest: src/file2.ts}` ); const output = withMockedConsole(() => { cli(["generate", path.join(tempDir, "walkthrough.yaml")]); }); // Check first section does NOT have its own file // (section folders only contain state BEFORE their own steps) const firstSectionPath = path.join(tempDir, 'build/by-section/00-first-section'); expect(fs.existsSync(path.join(firstSectionPath, 'src/file1.ts'))).toBe(false); // Check second section has first section's file but NOT its own file const secondSectionPath = path.join(tempDir, 'build/by-section/01-second-section'); expect(fs.existsSync(path.join(secondSectionPath, 'src/file1.ts'))).toBe(true); expect(fs.existsSync(path.join(secondSectionPath, 'src/file2.ts'))).toBe(false); // Check READMEs const firstReadme = fs.readFileSync(path.join(firstSectionPath, 'README.md'), 'utf8'); expect(firstReadme).toContain("Add first file"); expect(firstReadme).toContain("cp ./walkthrough/file1.ts src/file1.ts"); const secondReadme = fs.readFileSync(path.join(secondSectionPath, 'README.md'), 'utf8'); expect(secondReadme).toContain("Add second file"); expect(secondReadme).toContain("cp ./walkthrough/file2.ts src/file2.ts"); }); }); it("should correctly generate section folders with dir creation and specific file content", () => { withTmpDir((tempDir: string) => { // --- Setup source files --- fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true }); // package.json for hello-world section fs.writeFileSync( path.join(tempDir, 'walkthrough/00-package.json'), JSON.stringify({ name: "hello-world-pkg", dependencies: {} }, null, 2) ); // tsconfig.json for hello-world section fs.writeFileSync( path.join(tempDir, 'walkthrough/00-tsconfig.json'), JSON.stringify({ compilerOptions: { target: "esnext" } }, null, 2) ); // This is the content EXPECTED in hello-world/src/index.ts const expectedHelloWorldIndexContent = 'console.log("hello, world!"); // Simple version'; // The YAML for hello-world section will point to this source file. fs.writeFileSync( path.join(tempDir, 'walkthrough/01-index.ts'), // As per user's YAML for hello-world expectedHelloWorldIndexContent ); // This is the content that the user sees INCORRECTLY appearing in hello-world/src/index.ts. // This file won't be directly referenced by the hello-world section in this test's YAML. // If this content appears, it means something is wrong with file sourcing or cumulative logic. const cliIndexContent = 'import { cli } from "./cli"; cli(); // CLI version'; fs.writeFileSync( path.join(tempDir, 'walkthrough/02-index.ts'), // A different file cliIndexContent ); const cliTSContent = 'export function cli() { console.log("cli"); }'; fs.writeFileSync( path.join(tempDir, 'walkthrough/02-cli.ts'), // A different file cliTSContent ); // --- Setup walkthrough.yaml --- const walkthroughYamlContent = ` title: "Test Folders Feature" text: "Testing dir creation and file content isolation between sections." targets: - folders: path: "./build/sections" skip: - "cleanup" final: dirName: "final" sections: - name: cleanup title: "Cleanup Section" steps: - text: "Simulate cleanup (command is illustrative, not run by folders target)" command: "rm -rf src/" - name: hello-world title: "Hello World Section" steps: - text: "Copy package.json" file: {src: ./walkthrough/00-package.json, dest: package.json} - text: "Copy tsconfig.json" file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json} - text: "Create src folder" dir: {create: true, path: src} - text: "Add simple hello world index.ts" file: {src: ./walkthrough/01-index.ts, dest: src/index.ts} # Points to expectedHelloWorldIndexContent - name: cli-version # A subsequent section title: "CLI Version Section" steps: - text: "add a CLI" file: {src: ./walkthrough/02-cli.ts, dest: src/cli.ts} # adds src/cli.ts - text: "Update index.ts to CLI version" file: {src: ./walkthrough/02-index.ts, dest: src/index.ts} # Overwrites src/index.ts - name: runnable title: "run the cli" steps: - text: "run the cli" command: "npx tsx src/index.ts" `; fs.writeFileSync(path.join(tempDir, 'walkthrough.yaml'), walkthroughYamlContent); // --- Run CLI --- cli(["generate", path.join(tempDir, "walkthrough.yaml")]); // --- Assertions --- const cleanupSectionPath = path.join(tempDir, 'build/sections/00-cleanup'); const helloWorldSectionPath = path.join(tempDir, 'build/sections/00-hello-world'); const cliSectionPath = path.join(tempDir, 'build/sections/01-cli-version'); const finalSectionPath = path.join(tempDir, 'build/sections/final'); // // Cleanup Section // // cleanup has skip:true so it should not exist expect(fs.existsSync(cleanupSectionPath)).toBe(false); // // Hello World Section // // Assert hello-world section - this should have the results of the previous step (NOTHING) expect(fs.existsSync(helloWorldSectionPath)).toBe(true); // Check package.json and tsconfig.json don't exist yet expect(fs.existsSync(path.join(helloWorldSectionPath, 'src'))).toBe(false); expect(fs.existsSync(path.join(helloWorldSectionPath, 'package.json'))).toBe(false); expect(fs.existsSync(path.join(helloWorldSectionPath, 'tsconfig.json'))).toBe(false); // // CLI Section // // The cli section should contain the results of the hell-world section const packageJSONPath = path.join(cliSectionPath, 'package.json'); const tsconfigJSONPath = path.join(cliSectionPath, 'tsconfig.json'); const indexTSPath = path.join(cliSectionPath, 'src/index.ts'); expect(fs.existsSync(packageJSONPath)).toBe(true); expect(fs.existsSync(tsconfigJSONPath)).toBe(true); expect(fs.existsSync(indexTSPath)).toBe(true); const packageJSONContent = fs.readFileSync(packageJSONPath, 'utf8'); expect(packageJSONContent).toContain("hello-world-pkg"); const tsconfigJSONContent = fs.readFileSync(tsconfigJSONPath, 'utf8'); expect(tsconfigJSONContent).toContain("\"target\": \"esnext\""); const indexTSContent = fs.readFileSync(indexTSPath, 'utf8'); expect(indexTSContent).toContain("console.log(\"hello, world!\");"); // // Final Section // // the final folder, marked by "final: dirName: final" should contain all the files from the last section expect(fs.existsSync(finalSectionPath)).toBe(true); expect(fs.existsSync(path.join(finalSectionPath, 'src/index.ts'))).toBe(true); expect(fs.existsSync(path.join(finalSectionPath, 'src/cli.ts'))).toBe(true); expect(fs.existsSync(path.join(finalSectionPath, 'package.json'))).toBe(true); expect(fs.existsSync(path.join(finalSectionPath, 'tsconfig.json'))).toBe(true); // Verify index.ts calls the cli function const finalIndexContent = fs.readFileSync(path.join(finalSectionPath, 'src/index.ts'), 'utf8'); expect(finalIndexContent).toContain(cliIndexContent); const finalCliContent = fs.readFileSync(path.join(finalSectionPath, 'src/cli.ts'), 'utf8'); expect(finalCliContent).toContain(cliTSContent); }); }); it("should execute commands in the working directory for folders target", () => { withTmpDir((tempDir: string) => { // Create walkthrough.yaml fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "Test Command Execution in Folders" text: "Testing command execution" targets: - folders: path: "./build/cmd-test" final: dirName: "final-cmd" sections: - name: section-with-command title: "Section with Command" steps: - text: "Create a file via command" command: "echo 'command content' > command_file.txt" incremental: true - name: next-section title: "Next Section" steps: - text: "Another step" command: "echo 'another' > another_file.txt" incremental: true` ); // Run CLI cli(["generate", path.join(tempDir, "walkthrough.yaml")]); // Assertions const firstSectionPath = path.join(tempDir, 'build/cmd-test/00-section-with-command'); const secondSectionPath = path.join(tempDir, 'build/cmd-test/01-next-section'); const finalPath = path.join(tempDir, 'build/cmd-test/final-cmd'); // First section should NOT have its own command's file expect(fs.existsSync(path.join(firstSectionPath, 'command_file.txt'))).toBe(false); // Second section SHOULD have first section's command's file expect(fs.existsSync(path.join(secondSectionPath, 'command_file.txt'))).toBe(true); // But should NOT have its own command's file expect(fs.existsSync(path.join(secondSectionPath, 'another_file.txt'))).toBe(false); // Final folder should have both files expect(fs.existsSync(path.join(finalPath, 'command_file.txt'))).toBe(true); expect(fs.existsSync(path.join(finalPath, 'another_file.txt'))).toBe(true); // Check file contents const commandFileContent = fs.readFileSync(path.join(secondSectionPath, 'command_file.txt'), 'utf8').trim(); expect(commandFileContent).toBe('command content'); const finalAnotherFileContent = fs.readFileSync(path.join(finalPath, 'another_file.txt'), 'utf8').trim(); expect(finalAnotherFileContent).toBe('another'); }); }); it("should handle incremental commands correctly", () => { withTmpDir((tempDir: string) => { // Create walkthrough.yaml fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "Test Incremental Commands" text: "Testing incremental command behavior" targets: - markdown: "./walkthrough.md" - folders: path: "./build/cmd-test" final: dirName: "final" sections: - name: section-with-commands title: "Section with Commands" steps: - text: "Regular command (not executed in folders, shown in MD)" command: "echo 'regular command' > regular.txt" - text: "Incremental command (executed in folders, shown in MD)" command: "echo 'incremental command' > incremental.txt" incremental: true - text: "Another regular command (not executed in folders, shown in MD)" command: "echo 'another regular' > another_regular.txt" incremental: false` ); // Run CLI cli(["generate", path.join(tempDir, "walkthrough.yaml")]); // Check markdown output - ALL commands should be in markdown const markdownContent = fs.readFileSync(path.join(tempDir, 'walkthrough.md'), 'utf8'); expect(markdownContent).toContain("echo 'regular command' > regular.txt"); expect(markdownContent).toContain("echo 'incremental command' > incremental.txt"); expect(markdownContent).toContain("echo 'another regular' > another_regular.txt"); // Check folders output - only incremental commands should have run const finalPath = path.join(tempDir, 'build/cmd-test/final'); expect(fs.existsSync(path.join(finalPath, 'regular.txt'))).toBe(false); expect(fs.existsSync(path.join(finalPath, 'incremental.txt'))).toBe(true); expect(fs.existsSync(path.join(finalPath, 'another_regular.txt'))).toBe(false); // Check file contents for incremental command const incrementalContent = fs.readFileSync(path.join(finalPath, 'incremental.txt'), 'utf8').trim(); expect(incrementalContent).toBe('incremental command'); }); }); it("should generate section READMEs with diffs and show file blocks", () => { withTmpDir((tempDir: string) => { // Create source files fs.mkdirSync(path.join(tempDir, 'walkthrough'), { recursive: true }); fs.writeFileSync( path.join(tempDir, 'walkthrough/v1-index.ts'), 'console.log("hello");' ); fs.writeFileSync( path.join(tempDir, 'walkthrough/v2-index.ts'), 'console.log("hello");\nconsole.log("world");' ); // Create walkthrough.yaml fs.writeFileSync( path.join(tempDir, 'walkthrough.yaml'), `title: "Test Section README Diffs" text: "Testing section README diff generation" targets: - folders: path: "./build/sections" final: dirName: "final" onChange: diff: true cp: true newFiles: cat: false cp: true sections: - name: first-section title: "First Section" text: "First section text" steps: - text: "Add initial index.ts" file: {src: ./walkthrough/v1-index.ts, dest: src/index.ts} - name: second-section title: "Second Section" text: "Second section text" steps: - text: "Update index.ts" file: {src: ./walkthrough/v2-index.ts, dest: src/index.ts}` ); // Run CLI cli(["generate", path.join(tempDir, "walkthrough.yaml")]); // Check first section README const firstSectionPath = path.join(tempDir, 'build/sections/00-first-section'); const firstReadme = fs.readFileSync(path.join(firstSectionPath, 'README.md'), 'utf8'); expect(firstReadme).toContain("Add initial index.ts"); expect(firstReadme).toContain("cp ./walkthrough/v1-index.ts src/index.ts"); expect(firstReadme).toContain("
\nshow file"); expect(firstReadme).toContain("```ts\n// ./walkthrough/v1-index.ts"); expect(firstReadme).toContain('console.log("hello");'); // Check second section README const secondSectionPath = path.join(tempDir, 'build/sections/01-second-section'); const secondReadme = fs.readFileSync(path.join(secondSectionPath, 'README.md'), 'utf8'); expect(secondReadme).toContain("Update index.ts"); expect(secondReadme).toContain("```diff\nsrc/index.ts\n+console.log(\"world\");"); expect(secondReadme).toContain("
\nskip this step"); expect(secondReadme).toContain("cp ./walkthrough/v2-index.ts src/index.ts"); }); }); }); ================================================ FILE: packages/walkthroughgen/test/utils/console-mock.ts ================================================ /** * A utility function to mock console.log and console.error and capture their output * @param callback The function to execute while console is mocked * @returns The captured console output (both log and error messages) */ export const withMockedConsole = (callback: () => void): string => { const originalConsoleLog = console.log; const originalConsoleError = console.error; let capturedOutput: string[] = []; console.log = (...args: any[]) => { capturedOutput.push(args.join(" ")); }; console.error = (...args: any[]) => { capturedOutput.push(args.join(" ")); }; try { callback(); } finally { console.log = originalConsoleLog; console.error = originalConsoleError; } return capturedOutput.join("\n"); }; ================================================ FILE: packages/walkthroughgen/test/utils/temp-dir.ts ================================================ import { mkdtempSync, rmSync } from 'fs'; import { tmpdir } from 'os'; import { join } from 'path'; /** * Creates a temporary directory, executes a function with that directory, then removes it */ export function withTmpDir(fn: (dir: string) => T): T { const dir = mkdtempSync(join(__dirname, '.tmptest')); try { return fn(dir); } finally { rmSync(dir, { recursive: true, force: true }); } } ================================================ FILE: packages/walkthroughgen/tsconfig.json ================================================ { "compilerOptions": { "target": "es2016", "module": "commonjs", "esModuleInterop": true, "forceConsistentCasingInFileNames": true, "strict": true, "skipLibCheck": true }, "exclude": ["node_modules", "dist", "**/walkthrough/**"] } ================================================ FILE: workshops/.gitignore ================================================ baml_client/ ================================================ FILE: workshops/.python-version ================================================ 3.11 ================================================ FILE: workshops/2025-05/.gitignore ================================================ build/ ================================================ FILE: workshops/2025-05/Makefile ================================================ .PHONY: clean clean: rm -rf build/ .PHONY: generate generate: clean npm -C ../../packages/walkthroughgen/ \ exec tsx \ ../../packages/walkthroughgen/src/index.ts \ generate walkthrough.yaml ================================================ FILE: workshops/2025-05/final/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/final/baml_src/agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05/final/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/final/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/final/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/final/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "express": "^5.1.0", "humanlayer": "^0.7.7", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/express": "^5.0.1", "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0", "supertest": "^7.1.0" } } ================================================ FILE: workshops/2025-05/final/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } awaitingHumanResponse(): boolean { const lastEvent = this.events[this.events.length - 1]; return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); } awaitingHumanApproval(): boolean { const lastEvent = this.events[this.events.length - 1]; return lastEvent.data.intent === 'divide'; } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "divide": // divide is scary, return it for human approval return thread; case "add": case "subtract": case "multiply": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/final/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, // custom email body - jinja template: `{% if type == 'request_more_information' %} {{ event.spec.msg }} {% else %} agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} with args: {{event.spec.kwargs}}

reply to this email to approve {% endif %}` } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05/final/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/final/src/server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/final/src/state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ================================================ FILE: workshops/2025-05/final/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/00-hello-world/README.md ================================================ # Chapter 0 - Hello World Let's start with a basic TypeScript setup and a hello world program. This guide is written in TypeScript (yes, a python version is coming soon) There are many checkpoints between the every file edit in theworkshop steps, so even if you aren't super familiar with typescript, you should be able to keep up and run each example. To run this guide, you'll need a relatively recent version of nodejs and npm installed You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine brew install node@20 You should see the node version node --version Copy initial package.json cp ./walkthrough/00-package.json package.json
show file ```json // ./walkthrough/00-package.json { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ```
Install dependencies npm install Copy tsconfig.json cp ./walkthrough/00-tsconfig.json tsconfig.json
show file ```json // ./walkthrough/00-tsconfig.json { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ```
add .gitignore cp ./walkthrough/00-.gitignore .gitignore
show file ```gitignore // ./walkthrough/00-.gitignore baml_client/ node_modules/ ```
Create src folder mkdir -p src Add a simple hello world index.ts cp ./walkthrough/00-index.ts src/index.ts
show file ```ts // ./walkthrough/00-index.ts async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ```
Run it to verify npx tsx src/index.ts You should see: hello, world! ================================================ FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-index.ts ================================================ async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/sections/00-hello-world/walkthrough/00-tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/01-cli-and-agent/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/01-cli-and-agent/README.md ================================================ # Chapter 1 - CLI and Agent Loop Now let's add BAML and create our first agent with a CLI interface. First, we'll need to install [BAML](https://github.com/boundaryml/baml) which is a tool for prompting and structured outputs. npm install @boundaryml/baml Initialize BAML npx baml-cli init Remove default resume.baml rm baml_src/resume.baml Add our starter agent, a single baml prompt that we'll build on cp ./walkthrough/01-agent.baml baml_src/agent.baml
show file ```rust // ./walkthrough/01-agent.baml class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ```
Generate BAML client code npx baml-cli generate Enable BAML logging for this section export BAML_LOG=debug Add the CLI interface cp ./walkthrough/01-cli.ts src/cli.ts
show file ```ts // ./walkthrough/01-cli.ts // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ```
Update index.ts to use the CLI ```diff src/index.ts +import { cli } from "./cli" + async function hello(): Promise { console.log('hello, world!') async function main() { - await hello() + await cli() } ```
skip this step cp ./walkthrough/01-index.ts src/index.ts
Add the agent implementation cp ./walkthrough/01-agent.ts src/agent.ts
show file ```ts // ./walkthrough/01-agent.ts import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ```
The the BAML code is configured to use OPENAI_API_KEY by default As you're testing, you can change the model / provider to something else as you please client "openai/gpt-4o" [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms) For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider. If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key. export OPENAI_API_KEY=... Try it out npx tsx src/index.ts hello you should see a familiar response from the model { intent: 'done_for_now', message: 'Hello! How can I assist you today?' } ================================================ FILE: workshops/2025-05/sections/01-cli-and-agent/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/sections/01-cli-and-agent/src/index.ts ================================================ async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/01-cli-and-agent/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ================================================ FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05/sections/01-cli-and-agent/walkthrough/01-index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/README.md ================================================ # Chapter 2 - Add Calculator Tools Let's add some calculator tools to our agent. Let's start by adding a tool definition for the calculator These are simpile structured outputs that we'll ask the model to return as a "next step" in the agentic loop. cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
show file ```rust // ./walkthrough/02-tool_calculator.baml type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ```
Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps ```diff baml_src/agent.baml function DetermineNextStep( thread: string -) -> DoneForNow { +) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" ```
skip this step cp ./walkthrough/02-agent.baml baml_src/agent.baml
Generate updated BAML client npx baml-cli generate Try out the calculator npx tsx src/index.ts 'can you add 3 and 4' You should see a tool call to the calculator { intent: 'add', a: 3, b: 4 } ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/baml_src/agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/src/agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/walkthrough/02-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05/sections/02-calculator-tools/walkthrough/02-tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/03-tool-loop/README.md ================================================ # Chapter 3 - Process Tool Calls in a Loop Now let's add a real agentic loop that can run the tools and get a final answer from the LLM. First, lets update the agent to handle the tool call ```diff src/agent.ts } -// right now this just runs one turn with the LLM, but -// we'll update this function to handle all the agent logic -export async function agentLoop(thread: Thread): Promise { - const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); - return nextStep; + + +export async function agentLoop(thread: Thread): Promise { + + while (true) { + const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); + console.log("nextStep", nextStep); + + switch (nextStep.intent) { + case "done_for_now": + // response to human, return the next step object + return nextStep.message; + case "add": + thread.events.push({ + "type": "tool_call", + "data": nextStep + }); + const result = nextStep.a + nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + continue; + default: + throw new Error(`Unknown intent: ${nextStep.intent}`); + } + } } ```
skip this step cp ./walkthrough/03-agent.ts src/agent.ts
Now, lets try it out npx tsx src/index.ts 'can you add 3 and 4' you should see the agent call the tool and then return the result { intent: 'done_for_now', message: 'The sum of 3 and 4 is 7.' } For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output export BAML_LOG=off Try a multi-step calculation npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result' you'll notice that tools like multiply and divide are not available npx tsx src/index.ts 'can you multiply 3 and 4' next, let's add handlers for the rest of the calculator tools ```diff src/agent.ts -import { b } from "../baml_client"; +import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; -// tool call or a respond to human tool -type AgentResponse = Awaited>; - export interface Event { type: string } +export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; +export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { + let result: number; + switch (nextStep.intent) { + case "add": + result = nextStep.a + nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "subtract": + result = nextStep.a - nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "multiply": + result = nextStep.a * nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "divide": + result = nextStep.a / nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + } +} export async function agentLoop(thread: Thread): Promise { console.log("nextStep", nextStep); + thread.events.push({ + "type": "tool_call", + "data": nextStep + }); + switch (nextStep.intent) { case "done_for_now": return nextStep.message; case "add": - thread.events.push({ - "type": "tool_call", - "data": nextStep - }); - const result = nextStep.a + nextStep.b; - console.log("tool_response", result); - thread.events.push({ - "type": "tool_response", - "data": result - }); - continue; - default: - throw new Error(`Unknown intent: ${nextStep.intent}`); + case "subtract": + case "multiply": + case "divide": + thread = await handleNextStep(nextStep, thread); } } ```
skip this step cp ./walkthrough/03b-agent.ts src/agent.ts
Test subtraction npx tsx src/index.ts 'can you subtract 3 from 4' now, let's test the multiplication tool npx tsx src/index.ts 'can you multiply 3 and 4' finally, let's test a more complex calculation with multiple operations npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' ================================================ FILE: workshops/2025-05/sections/03-tool-loop/baml_src/agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/src/agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/03-tool-loop/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/walkthrough/03-agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": thread.events.push({ "type": "tool_call", "data": nextStep }); const result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); continue; default: throw new Error(`Unknown intent: ${nextStep.intent}`); } } } ================================================ FILE: workshops/2025-05/sections/03-tool-loop/walkthrough/03b-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/04-baml-tests/README.md ================================================ # Chapter 4 - Add Tests to agent.baml Let's add some tests to our BAML agent. to start, leave the baml logs enabled export BAML_LOG=debug next, let's add some tests to the agent We'll start with a simple test that checks the agent's ability to handle a basic calculation. ```diff baml_src/agent.baml "# } + +test MathOperation { + functions [DetermineNextStep] + args { + thread #" + { + "type": "user_input", + "data": "can you multiply 3 and 4?" + } + "# + } +} + ```
skip this step cp ./walkthrough/04-agent.baml baml_src/agent.baml
Run the tests npx baml-cli test now, let's improve the test with assertions! Assertions are a great way to make sure the agent is working as expected, and can easily be extended to check for more complex behavior. ```diff baml_src/agent.baml "# } + @@assert(hello, {{this.intent == "done_for_now"}}) } "# } + @@assert(math_operation, {{this.intent == "multiply"}}) } ```
skip this step cp ./walkthrough/04b-agent.baml baml_src/agent.baml
Run the tests npx baml-cli test as you add more tests, you can disable the logs to keep the output clean. You may want to turn them on as you iterate on specific tests. export BAML_LOG=off now, let's add some more complex test cases, where we resume from in the middle of an in-progress agentic context window ```diff baml_src/agent.baml "# } - @@assert(hello, {{this.intent == "done_for_now"}}) + @@assert(intent, {{this.intent == "done_for_now"}}) } "# } - @@assert(math_operation, {{this.intent == "multiply"}}) + @@assert(intent, {{this.intent == "multiply"}}) } +test LongMath { + functions [DetermineNextStep] + args { + thread #" + [ + { + "type": "user_input", + "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" + }, + { + "type": "tool_call", + "data": { + "intent": "multiply", + "a": 3, + "b": 4 + } + }, + { + "type": "tool_response", + "data": 12 + }, + { + "type": "tool_call", + "data": { + "intent": "divide", + "a": 12, + "b": 2 + } + }, + { + "type": "tool_response", + "data": 6 + }, + { + "type": "tool_call", + "data": { + "intent": "add", + "a": 6, + "b": 12 + } + }, + { + "type": "tool_response", + "data": 18 + } + ] + "# + } + @@assert(intent, {{this.intent == "done_for_now"}}) + @@assert(answer, {{"18" in this.message}}) +} + ```
skip this step cp ./walkthrough/04c-agent.baml baml_src/agent.baml
let's try to run it npx baml-cli test ================================================ FILE: workshops/2025-05/sections/04-baml-tests/baml_src/agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/04-baml-tests/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/walkthrough/04-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/walkthrough/04b-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(hello, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(math_operation, {{this.intent == "multiply"}}) } ================================================ FILE: workshops/2025-05/sections/04-baml-tests/walkthrough/04c-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } ================================================ FILE: workshops/2025-05/sections/05-human-tools/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/05-human-tools/README.md ================================================ # Chapter 5 - Multiple Human Tools In this section, we'll add support for multiple tools that serve to contact humans. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off first, let's add a tool that can request clarification from a human this will be different from the "done_for_now" tool, and can be used to more flexibly handle different types of human interactions in your agent. ```diff baml_src/agent.baml +// human tools are async requests to a human +type HumanTools = ClarificationRequest | DoneForNow + +class ClarificationRequest { + intent "request_more_information" @description("you can request more information from me") + message string +} + class DoneForNow { intent "done_for_now" - message string + + message string @description(#" + message to send to the user about the work that was done. + "#) } function DetermineNextStep( thread: string -) -> CalculatorTools | DoneForNow { +) -> HumanTools | CalculatorTools { client "openai/gpt-4o" } + ```
skip this step cp ./walkthrough/05-agent.baml baml_src/agent.baml
next, let's re-generate the client code NOTE - if you're using the VSCode extension for BAML, the client will be regenerated automatically when you save the file in your editor. npx baml-cli generate now, let's update the agent to use the new tool ```diff src/agent.ts } -export async function agentLoop(thread: Thread): Promise { +export async function agentLoop(thread: Thread): Promise { while (true) { switch (nextStep.intent) { case "done_for_now": - // response to human, return the next step object - return nextStep.message; + case "request_more_information": + // response to human, return the thread + return thread; case "add": case "subtract": ```
skip this step cp ./walkthrough/05-agent.ts src/agent.ts
next, let's update the CLI to handle clarification requests by requesting input from the user on the CLI ```diff src/cli.ts // cli.ts lets you invoke the agent loop from the command line -import { agentLoop, Thread, Event } from "./agent"; +import { agentLoop, Thread, Event } from "../src/agent"; + + export async function cli() { // Get command line arguments, skipping the first two (node and script name) // Run the agent loop with the thread const result = await agentLoop(thread); - console.log(result); + let lastEvent = result.events.slice(-1)[0]; + + while (lastEvent.data.intent === "request_more_information") { + const message = await askHuman(lastEvent.data.message); + thread.events.push({ type: "human_response", data: message }); + const result = await agentLoop(thread); + lastEvent = result.events.slice(-1)[0]; + } + + // print the final result + // optional - you could loop here too + console.log(lastEvent.data.message); + process.exit(0); } + +async function askHuman(message: string) { + const readline = require('readline').createInterface({ + input: process.stdin, + output: process.stdout + }); + + return new Promise((resolve) => { + readline.question(`${message}\n> `, (answer: string) => { + resolve(answer); + }); + }); +} ```
skip this step cp ./walkthrough/05-cli.ts src/cli.ts
let's try it out npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& ' next, let's add a test that checks the agent's ability to handle a clarification request ```diff baml_src/agent.baml + +test MathOperationWithClarification { + functions [DetermineNextStep] + args { + thread #" + [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] + "# + } + @@assert(intent, {{this.intent == "request_more_information"}}) +} + +test MathOperationPostClarification { + functions [DetermineNextStep] + args { + thread #" + [ + {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, + {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, + {"type":"human_response","data":"lets try 12 instead"}, + ] + "# + } + @@assert(intent, {{this.intent == "multiply"}}) + @@assert(a, {{this.b == 12}}) + @@assert(b, {{this.a == 3}}) +} + + + ```
skip this step cp ./walkthrough/05b-agent.baml baml_src/agent.baml
and now we can run the tests again npx baml-cli test you'll notice the new test passes, but the hello world test fails This is because the agent's default behavior is to return "done_for_now" ```diff baml_src/agent.baml "# } - @@assert(intent, {{this.intent == "done_for_now"}}) + @@assert(intent, {{this.intent == "request_more_information"}}) } ```
skip this step cp ./walkthrough/05c-agent.baml baml_src/agent.baml
Verify tests pass npx baml-cli test ================================================ FILE: workshops/2025-05/sections/05-human-tools/baml_src/agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } ================================================ FILE: workshops/2025-05/sections/05-human-tools/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/05-human-tools/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.202.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/05-human-tools/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/05-human-tools/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "@boundaryml/baml": "latest", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/sections/05-human-tools/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/05-human-tools/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05/sections/05-human-tools/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/05-human-tools/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } ================================================ FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); let lastEvent = result.events.slice(-1)[0]; while (lastEvent.data.intent === "request_more_information") { const message = await askHuman(lastEvent.data.message); thread.events.push({ type: "human_response", data: message }); const result = await agentLoop(thread); lastEvent = result.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(message: string) { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve(answer); }); }); } ================================================ FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05b-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05/sections/05-human-tools/walkthrough/05c-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/README.md ================================================ # Chapter 6 - Customize Your Prompt with Reasoning In this section, we'll explore how to customize the prompt of the agent with reasoning steps. this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts) for this section, it will be helpful to leave the baml logs enabled export BAML_LOG=debug update the agent prompt to include a reasoning step ```diff baml_src/agent.baml {{ ctx.output_format }} + + First, always plan out what to do next, for example: + + - ... + - ... + - ... + + {...} // schema "# } @@assert(b, {{this.a == 3}}) } - - ```
skip this step cp ./walkthrough/06-agent.baml baml_src/agent.baml
generate the updated client npx baml-cli generate now, you can try it out with a simple prompt npx tsx src/index.ts 'can you multiply 3 and 4' you should see output from the baml logs showing the reasoning steps #### optional challenge add a field to your tool output format that includes the reasoning steps in the output! ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); let lastEvent = result.events.slice(-1)[0]; while (lastEvent.data.intent === "request_more_information") { const message = await askHuman(lastEvent.data.message); thread.events.push({ type: "human_response", data: message }); const result = await agentLoop(thread); lastEvent = result.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(message: string) { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve(answer); }); }); } ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/06-customize-prompt/walkthrough/06-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} First, always plan out what to do next, for example: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05/sections/07-context-window/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/07-context-window/README.md ================================================ # Chapter 7 - Customize Your Context Window In this section, we'll explore how to customize the context window of the agent. this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) update the agent to pretty-print the Context window for the model ```diff src/agent.ts // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 - return JSON.stringify(this.events); + return JSON.stringify(this.events, null, 2); } } ```
skip this step cp ./walkthrough/07-agent.ts src/agent.ts
Test the formatting BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' next, let's update the agent to use XML formatting instead this is a very popular format for passing data to a model, among other things, because of the token efficiency of XML. ```diff src/agent.ts serializeForLLM() { - // can change this to whatever custom serialization you want to do, XML, etc - // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 - return JSON.stringify(this.events, null, 2); + return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } + + trimLeadingWhitespace(s: string) { + return s.replace(/^[ \t]+/gm, ''); + } + + serializeOneEvent(e: Event) { + return this.trimLeadingWhitespace(` + <${e.data?.intent || e.type}> + ${ + typeof e.data !== 'object' ? e.data : + Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} + + `) + } } ```
skip this step cp ./walkthrough/07b-agent.ts src/agent.ts
let's try it out BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' lets update our tests to match the new output format ```diff baml_src/agent.baml {{ ctx.output_format }} - First, always plan out what to do next, for example: + Always think about what to do next first, like: - ... args { thread #" - { - "type": "user_input", - "data": "hello!" - } + + hello! + "# } args { thread #" - { - "type": "user_input", - "data": "can you multiply 3 and 4?" - } + + can you multiply 3 and 4? + "# } args { thread #" - [ - { - "type": "user_input", - "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" - }, - { - "type": "tool_call", - "data": { - "intent": "multiply", - "a": 3, - "b": 4 - } - }, - { - "type": "tool_response", - "data": 12 - }, - { - "type": "tool_call", - "data": { - "intent": "divide", - "a": 12, - "b": 2 - } - }, - { - "type": "tool_response", - "data": 6 - }, - { - "type": "tool_call", - "data": { - "intent": "add", - "a": 6, - "b": 12 - } - }, - { - "type": "tool_response", - "data": 18 - } - ] + + can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? + + + + + a: 3 + b: 4 + + + + + 12 + + + + + a: 12 + b: 2 + + + + + 6 + + + + + a: 6 + b: 12 + + + + + 18 + + "# } args { thread #" - [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] + + can you multiply 3 and fe1iiaff10 + "# } args { thread #" - [ - {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, - {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, - {"type":"human_response","data":"lets try 12 instead"}, - ] + + can you multiply 3 and FD*(#F&& ? + + + + message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? + + + + lets try 12 instead + "# } @@assert(intent, {{this.intent == "multiply"}}) } ```
skip this step cp ./walkthrough/07c-agent.baml baml_src/agent.baml
check out the updated tests npx baml-cli test ================================================ FILE: workshops/2025-05/sections/07-context-window/baml_src/agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} First, always plan out what to do next, for example: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05/sections/07-context-window/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/07-context-window/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/07-context-window/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/07-context-window/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/sections/07-context-window/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/07-context-window/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); let lastEvent = result.events.slice(-1)[0]; while (lastEvent.data.intent === "request_more_information") { const message = await askHuman(lastEvent.data.message); thread.events.push({ type: "human_response", data: message }); const result = await agentLoop(thread); lastEvent = result.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(message: string) { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve(answer); }); }); } ================================================ FILE: workshops/2025-05/sections/07-context-window/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/07-context-window/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/07-context-window/walkthrough/07-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events, null, 2); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/07-context-window/walkthrough/07b-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/07-context-window/walkthrough/07c-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/README.md ================================================ # Chapter 8 - Adding API Endpoints Add an Express server to expose the agent via HTTP. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install Express and types npm install express && npm install --save-dev @types/express supertest Add the server implementation cp ./walkthrough/08-server.ts src/server.ts
show file ```ts // ./walkthrough/08-server.ts import express from 'express'; import { Thread, agentLoop } from '../src/agent'; const app = express(); app.use(express.json()); app.set('json spaces', 2); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const result = await agentLoop(thread); res.json(result); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { // optional - add state res.status(404).json({ error: "Not implemented yet" }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ```
Start the server npx tsx src/server.ts Test with curl (in another terminal) curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you add 3 and 4"}' You should get an answer from the agent which includes the agentic trace, ending in a message like: {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."} ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); let lastEvent = result.events.slice(-1)[0]; while (lastEvent.data.intent === "request_more_information") { const message = await askHuman(lastEvent.data.message); thread.events.push({ type: "human_response", data: message }); const result = await agentLoop(thread); lastEvent = result.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(message: string) { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve(answer); }); }); } ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/08-api-endpoints/walkthrough/08-server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; const app = express(); app.use(express.json()); app.set('json spaces', 2); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const result = await agentLoop(thread); res.json(result); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { // optional - add state res.status(404).json({ error: "Not implemented yet" }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/09-state-management/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/09-state-management/README.md ================================================ # Chapter 9 - In-Memory State and Async Clarification Add state management and async clarification support. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Add some simple in-memory state management for threads cp ./walkthrough/09-state.ts src/state.ts
show file ```ts // ./walkthrough/09-state.ts import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ```
update the server to use the state management * Add thread state management using `ThreadStore` * return thread IDs and response URLs from the /thread endpoint * implement GET /thread/:id * implement POST /thread/:id/response ```diff src/server.ts import express from 'express'; import { Thread, agentLoop } from '../src/agent'; +import { ThreadStore } from '../src/state'; const app = express(); app.set('json spaces', 2); +const store = new ThreadStore(); + // POST /thread - Start new thread app.post('/thread', async (req, res) => { data: req.body.message }]); - const result = await agentLoop(thread); - res.json(result); + + const threadId = store.create(thread); + const newThread = await agentLoop(thread); + + store.update(threadId, newThread); + + const lastEvent = newThread.events[newThread.events.length - 1]; + // If we exited the loop, include the response URL so the client can + // push a new message onto the thread + lastEvent.data.response_url = `/thread/${threadId}/response`; + + console.log("returning last event from endpoint", lastEvent); + + res.json({ + thread_id: threadId, + ...newThread + }); }); app.get('/thread/:id', (req, res) => { - // optional - add state - res.status(404).json({ error: "Not implemented yet" }); + const thread = store.get(req.params.id); + if (!thread) { + return res.status(404).json({ error: "Thread not found" }); + } + res.json(thread); }); +// POST /thread/:id/response - Handle clarification response +app.post('/thread/:id/response', async (req, res) => { + let thread = store.get(req.params.id); + if (!thread) { + return res.status(404).json({ error: "Thread not found" }); + } + + thread.events.push({ + type: "human_response", + data: req.body.message + }); + + // loop until stop event + const newThread = await agentLoop(thread); + + store.update(req.params.id, newThread); + + const lastEvent = newThread.events[newThread.events.length - 1]; + lastEvent.data.response_url = `/thread/${req.params.id}/response`; + + console.log("returning last event from endpoint", lastEvent); + + res.json(newThread); +}); + const port = process.env.PORT || 3000; app.listen(port, () => { ```
skip this step cp ./walkthrough/09-server.ts src/server.ts
Start the server npx tsx src/server.ts Test clarification flow curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you multiply 3 and xyz"}' ================================================ FILE: workshops/2025-05/sections/09-state-management/baml_src/agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05/sections/09-state-management/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/09-state-management/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/09-state-management/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/09-state-management/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "express": "^5.1.0", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/express": "^5.0.1", "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0", "supertest": "^7.1.0" } } ================================================ FILE: workshops/2025-05/sections/09-state-management/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/09-state-management/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); let lastEvent = result.events.slice(-1)[0]; while (lastEvent.data.intent === "request_more_information") { const message = await askHuman(lastEvent.data.message); thread.events.push({ type: "human_response", data: message }); const result = await agentLoop(thread); lastEvent = result.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(message: string) { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve(answer); }); }); } ================================================ FILE: workshops/2025-05/sections/09-state-management/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/09-state-management/src/server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; const app = express(); app.use(express.json()); app.set('json spaces', 2); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const result = await agentLoop(thread); res.json(result); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { // optional - add state res.status(404).json({ error: "Not implemented yet" }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/09-state-management/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/09-state-management/walkthrough/09-server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } thread.events.push({ type: "human_response", data: req.body.message }); // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/09-state-management/walkthrough/09-state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ================================================ FILE: workshops/2025-05/sections/10-human-approval/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/10-human-approval/README.md ================================================ # Chapter 10 - Adding Human Approval Add support for human approval of operations. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off update the server to handle human approvals * Import `handleNextStep` to execute approved actions * Add two payload types to distinguish approvals from responses * Handle responses and approvals differently in the endpoint * Show better error messages when things go wrongs ```diff src/server.ts import express from 'express'; -import { Thread, agentLoop } from '../src/agent'; +import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; }); + +type ApprovalPayload = { + type: "approval"; + approved: boolean; + comment?: string; +} + +type ResponsePayload = { + type: "response"; + response: string; +} + +type Payload = ApprovalPayload | ResponsePayload; + // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { return res.status(404).json({ error: "Thread not found" }); } + + const body: Payload = req.body; + + let lastEvent = thread.events[thread.events.length - 1]; + + if (thread.awaitingHumanResponse() && body.type === 'response') { + thread.events.push({ + type: "human_response", + data: body.response + }); + } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { + // push feedback onto the thread + thread.events.push({ + type: "tool_response", + data: `user denied the operation with feedback: "${body.comment}"` + }); + } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { + // approved, run the tool, pushing results onto the thread + await handleNextStep(lastEvent.data, thread); + } else { + res.status(400).json({ + error: "Invalid request: " + body.type, + awaitingHumanResponse: thread.awaitingHumanResponse(), + awaitingHumanApproval: thread.awaitingHumanApproval() + }); + return; + } + - thread.events.push({ - type: "human_response", - data: req.body.message - }); - // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); - const lastEvent = newThread.events[newThread.events.length - 1]; + lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; ```
skip this step cp ./walkthrough/10-server.ts src/server.ts
Add a few methods to the agent to handle approvals and responses ```diff src/agent.ts `) } + + awaitingHumanResponse(): boolean { + const lastEvent = this.events[this.events.length - 1]; + return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); + } + + awaitingHumanApproval(): boolean { + const lastEvent = this.events[this.events.length - 1]; + return lastEvent.data.intent === 'divide'; + } } // response to human, return the thread return thread; + case "divide": + // divide is scary, return it for human approval + return thread; case "add": case "subtract": case "multiply": - case "divide": thread = await handleNextStep(nextStep, thread); } ```
skip this step cp ./walkthrough/10-agent.ts src/agent.ts
Start the server npx tsx src/server.ts Test division with approval curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you divide 3 by 4"}' You should see: { "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b", "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } } ] } reject the request with another curl call, changing the thread ID curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}' You should see: the last tool call is now `"intent":"divide","a":3,"b":5` { "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } }, { "type": "tool_response", "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\"" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 5, "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response" } } ] } now you can approve the operation curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": true}' you should see the final message includes the tool response and final result! ... { "type": "tool_response", "data": 0.5 }, { "type": "done_for_now", "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!", "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response" } ================================================ FILE: workshops/2025-05/sections/10-human-approval/baml_src/agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05/sections/10-human-approval/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/10-human-approval/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/10-human-approval/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/10-human-approval/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "express": "^5.1.0", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/express": "^5.0.1", "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0", "supertest": "^7.1.0" } } ================================================ FILE: workshops/2025-05/sections/10-human-approval/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/10-human-approval/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); let lastEvent = result.events.slice(-1)[0]; while (lastEvent.data.intent === "request_more_information") { const message = await askHuman(lastEvent.data.message); thread.events.push({ type: "human_response", data: message }); const result = await agentLoop(thread); lastEvent = result.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(message: string) { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve(answer); }); }); } ================================================ FILE: workshops/2025-05/sections/10-human-approval/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/10-human-approval/src/server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } thread.events.push({ type: "human_response", data: req.body.message }); // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/10-human-approval/src/state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ================================================ FILE: workshops/2025-05/sections/10-human-approval/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/10-human-approval/walkthrough/10-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } awaitingHumanResponse(): boolean { const lastEvent = this.events[this.events.length - 1]; return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); } awaitingHumanApproval(): boolean { const lastEvent = this.events[this.events.length - 1]; return lastEvent.data.intent === 'divide'; } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "divide": // divide is scary, return it for human approval return thread; case "add": case "subtract": case "multiply": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/10-human-approval/walkthrough/10-server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/README.md ================================================ # Chapter 11 - Human Approvals over email in this section, we'll add support for human approvals over email. This will start a little bit contrived, just to get the concepts down - We'll start by invoking the workflow from the CLI but approvals for `divide` and `request_more_information` will be handled over email, then the final `done_for_now` answer will be printed back to the CLI While contrived, this is a great example of the flexibility you get from [factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md) for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install HumanLayer npm install humanlayer Update CLI to send `divide` and `request_more_information` to a human via email ```diff src/cli.ts // cli.ts lets you invoke the agent loop from the command line +import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; - - export async function cli() { // Get command line arguments, skipping the first two (node and script name) // Run the agent loop with the thread - const result = await agentLoop(thread); - let lastEvent = result.events.slice(-1)[0]; + let newThread = await agentLoop(thread); + let lastEvent = newThread.events.slice(-1)[0]; - while (lastEvent.data.intent === "request_more_information") { - const message = await askHuman(lastEvent.data.message); - thread.events.push({ type: "human_response", data: message }); - const result = await agentLoop(thread); - lastEvent = result.events.slice(-1)[0]; + while (lastEvent.data.intent !== "done_for_now") { + const responseEvent = await askHuman(lastEvent); + thread.events.push(responseEvent); + newThread = await agentLoop(thread); + lastEvent = newThread.events.slice(-1)[0]; } // print the final result console.log(lastEvent.data.message); process.exit(0); } -async function askHuman(message: string) { +async function askHuman(lastEvent: Event): Promise { + if (process.env.HUMANLAYER_API_KEY) { + return await askHumanEmail(lastEvent); + } else { + return await askHumanCLI(lastEvent.data.message); + } +} + +async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { - resolve(answer); + resolve({ type: "human_response", data: answer }); }); }); } + +export async function askHumanEmail(lastEvent: Event): Promise { + if (!process.env.HUMANLAYER_EMAIL) { + throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); + } + const hl = humanlayer({ //reads apiKey from env + // name of this agent + runId: "12fa-cli-agent", + verbose: true, + contactChannel: { + // agent should request permission via email + email: { + address: process.env.HUMANLAYER_EMAIL, + } + } + }) + + if (lastEvent.data.intent === "divide") { + // fetch approval synchronously - this will block until reply + const response = await hl.fetchHumanApproval({ + spec: { + fn: "divide", + kwargs: { + a: lastEvent.data.a, + b: lastEvent.data.b + } + } + }) + + if (response.approved) { + const result = lastEvent.data.a / lastEvent.data.b; + console.log("tool_response", result); + return { + "type": "tool_response", + "data": result + }; + } else { + return { + "type": "tool_response", + "data": `user denied operation ${lastEvent.data.intent} + with feedback: ${response.comment}` + }; + } + } + throw new Error(`unknown tool: ${lastEvent.data.intent}`) +} ```
skip this step cp ./walkthrough/11-cli.ts src/cli.ts
Run the CLI npx tsx src/index.ts 'can you divide 4 by 5' The last line of your program should mention human review step nextStep { intent: 'divide', a: 4, b: 5 } HumanLayer: Requested human approval from HumanLayer cloud go ahead and respond to the email with some feedback: ![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true) you should get another email with an updated attempt based on your feedback! You can go ahead and approve this one: ![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true) and your final output will look like nextStep { intent: 'done_for_now', message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!' } The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask! lets implement the `request_more_information` flow as well ```diff src/cli.ts }) + if (lastEvent.data.intent === "request_more_information") { + // fetch response synchronously - this will block until reply + const response = await hl.fetchHumanResponse({ + spec: { + msg: lastEvent.data.message + } + }) + return { + "type": "tool_response", + "data": response + } + } + if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply ```
skip this step cp ./walkthrough/11b-cli.ts src/cli.ts
lets test the require_approval flow as by asking for a calculation with garbled input: npx tsx src/index.ts 'can you multiply 4 and xyz' You should get an email with a request for clarification Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else? you can response with something like use 8 instead of xyz you should see a final result on the CLI like I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32. as a final step, lets explore using a custom html template for the email ```diff src/cli.ts email: { address: process.env.HUMANLAYER_EMAIL, + // custom email body - jinja + template: `{% if type == 'request_more_information' %} +{{ event.spec.msg }} +{% else %} +agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} +with args: {{event.spec.kwargs}} +

+reply to this email to approve +{% endif %}` } } ```
skip this step cp ./walkthrough/11c-cli.ts src/cli.ts
first try with divide: npx tsx src/index.ts 'can you divide 4 by 5' you should see a slightly different email with the custom template ![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true) feel free to run with the flow and then you can try updating the template to your liking (if you're using cursor, something as simple as highlighting the template and asking to "make it better" should do the trick) try triggering "request_more_information" as well! thats it - in the next chapter, we'll build a fully email-driven workflow agent that uses webhooks for human approval ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "express": "^5.1.0", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/express": "^5.0.1", "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0", "supertest": "^7.1.0" } } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } awaitingHumanResponse(): boolean { const lastEvent = this.events[this.events.length - 1]; return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); } awaitingHumanApproval(): boolean { const lastEvent = this.events[this.events.length - 1]; return lastEvent.data.intent === 'divide'; } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "divide": // divide is scary, return it for human approval return thread; case "add": case "subtract": case "multiply": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); let lastEvent = result.events.slice(-1)[0]; while (lastEvent.data.intent === "request_more_information") { const message = await askHuman(lastEvent.data.message); thread.events.push({ type: "human_response", data: message }); const result = await agentLoop(thread); lastEvent = result.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(message: string) { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve(answer); }); }); } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/src/server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/src/state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/walkthrough/11-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, } } }) if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/walkthrough/11b-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05/sections/11-humanlayer-approval/walkthrough/11c-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, // custom email body - jinja template: `{% if type == 'request_more_information' %} {{ event.spec.msg }} {% else %} agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} with args: {{event.spec.kwargs}}

reply to this email to approve {% endif %}` } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/README.md ================================================ # Chapter XX - HumanLayer Webhook Integration the previous sections used the humanlayer SDK in "synchronous mode" - that means every time we wait for human approval, we sit in a loop polling until the human response if received. That's obviously not ideal, especially for production workloads, so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md) by updating the server to end processing after contacting a human, and use webhooks to receive the results. add code to initialize humanlayer in the server ```diff src/server.ts import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; +import { humanlayer } from 'humanlayer'; const app = express(); const store = new ThreadStore(); +const getHumanlayer = () => { + const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; + if (!HUMANLAYER_EMAIL) { + throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); + } + + const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; + if (!HUMANLAYER_API_KEY) { + throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); + } + return humanlayer({ + runId: `12fa-agent`, + contactChannel: { + email: { address: HUMANLAYER_EMAIL } + } + }); +} + // POST /thread - Start new thread app.post('/thread', async (req, res) => { // loop until stop event - const newThread = await agentLoop(thread); + const result = await agentLoop(thread); - store.update(req.params.id, newThread); + store.update(req.params.id, result); - lastEvent = newThread.events[newThread.events.length - 1]; + lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); - res.json(newThread); + res.json(result); }); ```
skip this step cp ./walkthrough/12-1-server-init.ts src/server.ts
next, lets update the /thread endpoint to 1. handle requests asynchronously, returning immediately 2. create a human contact on request_more_information and done_for_now calls Update the server to be able to handle request_clarification responses - remove the old /response endpoint and types - update the /thread endpoint to run processing asynchronously, return immediately - send a state.threadId when requesting human responses - add a handleHumanResponse function to process the human response - add a /webhook endpoint to handle the webhook response ```diff src/server.ts -import express from 'express'; +import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; -import { humanlayer } from 'humanlayer'; +import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); }); } - // POST /thread - Start new thread -app.post('/thread', async (req, res) => { +app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", }]); - const threadId = store.create(thread); - const newThread = await agentLoop(thread); - - store.update(threadId, newThread); + // run agent loop asynchronously, return immediately + Promise.resolve().then(async () => { + const threadId = store.create(thread); + const newThread = await agentLoop(thread); + + store.update(threadId, newThread); - const lastEvent = newThread.events[newThread.events.length - 1]; - // If we exited the loop, include the response URL so the client can - // push a new message onto the thread - lastEvent.data.response_url = `/thread/${threadId}/response`; + const lastEvent = newThread.events[newThread.events.length - 1]; - console.log("returning last event from endpoint", lastEvent); - - res.json({ - thread_id: threadId, - ...newThread + if (thread.awaitingHumanResponse()) { + const hl = getHumanlayer(); + // create a human contact - returns immediately + hl.createHumanContact({ + spec: { + msg: lastEvent.data.message, + state: { + thread_id: threadId, + } + } + }); + } }); + + res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status -app.get('/thread/:id', (req, res) => { +app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { }); +type WebhookResponse = V1Beta2HumanContactCompleted; -type ApprovalPayload = { - type: "approval"; - approved: boolean; - comment?: string; -} +const handleHumanResponse = async (req: Request, res: Response) => { -type ResponsePayload = { - type: "response"; - response: string; } -type Payload = ApprovalPayload | ResponsePayload; +app.post('/webhook', async (req: Request, res: Response) => { + console.log("webhook response", req.body); + const response = req.body as WebhookResponse; -// POST /thread/:id/response - Handle clarification response -app.post('/thread/:id/response', async (req, res) => { - let thread = store.get(req.params.id); + // response is guaranteed to be set on a webhook + const humanResponse: string = response.event.status?.response as string; + + const threadId = response.event.spec.state?.thread_id; + if (!threadId) { + return res.status(400).json({ error: "Thread ID not found" }); + } + + const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } - const body: Payload = req.body; - - let lastEvent = thread.events[thread.events.length - 1]; - - if (thread.awaitingHumanResponse() && body.type === 'response') { - thread.events.push({ - type: "human_response", - data: body.response - }); - } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { - // push feedback onto the thread - thread.events.push({ - type: "tool_response", - data: `user denied the operation with feedback: "${body.comment}"` - }); - } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { - // approved, run the tool, pushing results onto the thread - await handleNextStep(lastEvent.data, thread); - } else { - res.status(400).json({ - error: "Invalid request: " + body.type, - awaitingHumanResponse: thread.awaitingHumanResponse(), - awaitingHumanApproval: thread.awaitingHumanApproval() - }); - return; + if (!thread.awaitingHumanResponse()) { + return res.status(400).json({ error: "Thread is not awaiting human response" }); } - - // loop until stop event - const result = await agentLoop(thread); - - store.update(req.params.id, result); - - lastEvent = result.events[result.events.length - 1]; - lastEvent.data.response_url = `/thread/${req.params.id}/response`; - - console.log("returning last event from endpoint", lastEvent); - - res.json(result); }); ```
skip this step cp ./walkthrough/12a-server.ts src/server.ts
Start the server in another terminal npx tsx src/server.ts now that the server is running, send a payload to the '/thread' endpoint __ do the response step __ now handle approvals for divide __ now also handle done_for_now ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "express": "^5.1.0", "humanlayer": "^0.7.7", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/express": "^5.0.1", "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0", "supertest": "^7.1.0" } } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } awaitingHumanResponse(): boolean { const lastEvent = this.events[this.events.length - 1]; return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); } awaitingHumanApproval(): boolean { const lastEvent = this.events[this.events.length - 1]; return lastEvent.data.intent === 'divide'; } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "divide": // divide is scary, return it for human approval return thread; case "add": case "subtract": case "multiply": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, // custom email body - jinja template: `{% if type == 'request_more_information' %} {{ event.spec.msg }} {% else %} agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} with args: {{event.spec.kwargs}}

reply to this email to approve {% endif %}` } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/src/state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/walkthrough/12-1-server-init.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const result = await agentLoop(thread); store.update(req.params.id, result); lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(result); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/12-humanlayer-webhook/walkthrough/12a-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/final/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/sections/final/README.md ================================================ # Chapter 0 - Hello World Let's start with a basic TypeScript setup and a hello world program. This guide is written in TypeScript (yes, a python version is coming soon) There are many checkpoints between the every file edit in theworkshop steps, so even if you aren't super familiar with typescript, you should be able to keep up and run each example. To run this guide, you'll need a relatively recent version of nodejs and npm installed You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine brew install node@20 You should see the node version node --version Copy initial package.json cp ./walkthrough/00-package.json package.json Install dependencies npm install Copy tsconfig.json cp ./walkthrough/00-tsconfig.json tsconfig.json add .gitignore cp ./walkthrough/00-.gitignore .gitignore Create src folder mkdir -p src Add a simple hello world index.ts cp ./walkthrough/00-index.ts src/index.ts Run it to verify npx tsx src/index.ts You should see: hello, world! # Chapter 1 - CLI and Agent Loop Now let's add BAML and create our first agent with a CLI interface. First, we'll need to install [BAML](https://github.com/boundaryml/baml) which is a tool for prompting and structured outputs. npm install @boundaryml/baml Initialize BAML npx baml-cli init Remove default resume.baml rm baml_src/resume.baml Add our starter agent, a single baml prompt that we'll build on cp ./walkthrough/01-agent.baml baml_src/agent.baml Generate BAML client code npx baml-cli generate Enable BAML logging for this section export BAML_LOG=debug Add the CLI interface cp ./walkthrough/01-cli.ts src/cli.ts Update index.ts to use the CLI cp ./walkthrough/01-index.ts src/index.ts Add the agent implementation cp ./walkthrough/01-agent.ts src/agent.ts The the BAML code is configured to use OPENAI_API_KEY by default As you're testing, you can change the model / provider to something else as you please client "openai/gpt-4o" [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms) For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider. If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key. export OPENAI_API_KEY=... Try it out npx tsx src/index.ts hello you should see a familiar response from the model { intent: 'done_for_now', message: 'Hello! How can I assist you today?' } # Chapter 2 - Add Calculator Tools Let's add some calculator tools to our agent. Let's start by adding a tool definition for the calculator These are simpile structured outputs that we'll ask the model to return as a "next step" in the agentic loop. cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps cp ./walkthrough/02-agent.baml baml_src/agent.baml Generate updated BAML client npx baml-cli generate Try out the calculator npx tsx src/index.ts 'can you add 3 and 4' You should see a tool call to the calculator { intent: 'add', a: 3, b: 4 } # Chapter 3 - Process Tool Calls in a Loop Now let's add a real agentic loop that can run the tools and get a final answer from the LLM. First, lets update the agent to handle the tool call cp ./walkthrough/03-agent.ts src/agent.ts Now, lets try it out npx tsx src/index.ts 'can you add 3 and 4' you should see the agent call the tool and then return the result { intent: 'done_for_now', message: 'The sum of 3 and 4 is 7.' } For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output export BAML_LOG=off Try a multi-step calculation npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result' you'll notice that tools like multiply and divide are not available npx tsx src/index.ts 'can you multiply 3 and 4' next, let's add handlers for the rest of the calculator tools cp ./walkthrough/03b-agent.ts src/agent.ts Test subtraction npx tsx src/index.ts 'can you subtract 3 from 4' now, let's test the multiplication tool npx tsx src/index.ts 'can you multiply 3 and 4' finally, let's test a more complex calculation with multiple operations npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' # Chapter 4 - Add Tests to agent.baml Let's add some tests to our BAML agent. to start, leave the baml logs enabled export BAML_LOG=debug next, let's add some tests to the agent We'll start with a simple test that checks the agent's ability to handle a basic calculation. cp ./walkthrough/04-agent.baml baml_src/agent.baml Run the tests npx baml-cli test now, let's improve the test with assertions! Assertions are a great way to make sure the agent is working as expected, and can easily be extended to check for more complex behavior. cp ./walkthrough/04b-agent.baml baml_src/agent.baml Run the tests npx baml-cli test as you add more tests, you can disable the logs to keep the output clean. You may want to turn them on as you iterate on specific tests. export BAML_LOG=off now, let's add some more complex test cases, where we resume from in the middle of an in-progress agentic context window cp ./walkthrough/04c-agent.baml baml_src/agent.baml let's try to run it npx baml-cli test # Chapter 5 - Multiple Human Tools In this section, we'll add support for multiple tools that serve to contact humans. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off first, let's add a tool that can request clarification from a human this will be different from the "done_for_now" tool, and can be used to more flexibly handle different types of human interactions in your agent. cp ./walkthrough/05-agent.baml baml_src/agent.baml next, let's re-generate the client code NOTE - if you're using the VSCode extension for BAML, the client will be regenerated automatically when you save the file in your editor. npx baml-cli generate now, let's update the agent to use the new tool cp ./walkthrough/05-agent.ts src/agent.ts next, let's update the CLI to handle clarification requests by requesting input from the user on the CLI cp ./walkthrough/05-cli.ts src/cli.ts let's try it out npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& ' next, let's add a test that checks the agent's ability to handle a clarification request cp ./walkthrough/05b-agent.baml baml_src/agent.baml and now we can run the tests again npx baml-cli test you'll notice the new test passes, but the hello world test fails This is because the agent's default behavior is to return "done_for_now" cp ./walkthrough/05c-agent.baml baml_src/agent.baml Verify tests pass npx baml-cli test # Chapter 6 - Customize Your Prompt with Reasoning In this section, we'll explore how to customize the prompt of the agent with reasoning steps. this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts) for this section, it will be helpful to leave the baml logs enabled export BAML_LOG=debug update the agent prompt to include a reasoning step cp ./walkthrough/06-agent.baml baml_src/agent.baml generate the updated client npx baml-cli generate now, you can try it out with a simple prompt npx tsx src/index.ts 'can you multiply 3 and 4' you should see output from the baml logs showing the reasoning steps #### optional challenge add a field to your tool output format that includes the reasoning steps in the output! # Chapter 7 - Customize Your Context Window In this section, we'll explore how to customize the context window of the agent. this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) update the agent to pretty-print the Context window for the model cp ./walkthrough/07-agent.ts src/agent.ts Test the formatting BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' next, let's update the agent to use XML formatting instead this is a very popular format for passing data to a model, among other things, because of the token efficiency of XML. cp ./walkthrough/07b-agent.ts src/agent.ts let's try it out BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' lets update our tests to match the new output format cp ./walkthrough/07c-agent.baml baml_src/agent.baml check out the updated tests npx baml-cli test # Chapter 8 - Adding API Endpoints Add an Express server to expose the agent via HTTP. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install Express and types npm install express && npm install --save-dev @types/express supertest Add the server implementation cp ./walkthrough/08-server.ts src/server.ts Start the server npx tsx src/server.ts Test with curl (in another terminal) curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you add 3 and 4"}' You should get an answer from the agent which includes the agentic trace, ending in a message like: {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."} # Chapter 9 - In-Memory State and Async Clarification Add state management and async clarification support. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Add some simple in-memory state management for threads cp ./walkthrough/09-state.ts src/state.ts update the server to use the state management * Add thread state management using `ThreadStore` * return thread IDs and response URLs from the /thread endpoint * implement GET /thread/:id * implement POST /thread/:id/response cp ./walkthrough/09-server.ts src/server.ts Start the server npx tsx src/server.ts Test clarification flow curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you multiply 3 and xyz"}' # Chapter 10 - Adding Human Approval Add support for human approval of operations. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off update the server to handle human approvals * Import `handleNextStep` to execute approved actions * Add two payload types to distinguish approvals from responses * Handle responses and approvals differently in the endpoint * Show better error messages when things go wrongs cp ./walkthrough/10-server.ts src/server.ts Add a few methods to the agent to handle approvals and responses cp ./walkthrough/10-agent.ts src/agent.ts Start the server npx tsx src/server.ts Test division with approval curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you divide 3 by 4"}' You should see: { "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b", "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } } ] } reject the request with another curl call, changing the thread ID curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}' You should see: the last tool call is now `"intent":"divide","a":3,"b":5` { "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } }, { "type": "tool_response", "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\"" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 5, "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response" } } ] } now you can approve the operation curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": true}' you should see the final message includes the tool response and final result! ... { "type": "tool_response", "data": 0.5 }, { "type": "done_for_now", "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!", "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response" } # Chapter 11 - Human Approvals over email in this section, we'll add support for human approvals over email. This will start a little bit contrived, just to get the concepts down - We'll start by invoking the workflow from the CLI but approvals for `divide` and `request_more_information` will be handled over email, then the final `done_for_now` answer will be printed back to the CLI While contrived, this is a great example of the flexibility you get from [factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md) for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install HumanLayer npm install humanlayer Update CLI to send `divide` and `request_more_information` to a human via email cp ./walkthrough/11-cli.ts src/cli.ts Run the CLI npx tsx src/index.ts 'can you divide 4 by 5' The last line of your program should mention human review step nextStep { intent: 'divide', a: 4, b: 5 } HumanLayer: Requested human approval from HumanLayer cloud go ahead and respond to the email with some feedback: ![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true) you should get another email with an updated attempt based on your feedback! You can go ahead and approve this one: ![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true) and your final output will look like nextStep { intent: 'done_for_now', message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!' } The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask! lets implement the `request_more_information` flow as well cp ./walkthrough/11b-cli.ts src/cli.ts lets test the require_approval flow as by asking for a calculation with garbled input: npx tsx src/index.ts 'can you multiply 4 and xyz' You should get an email with a request for clarification Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else? you can response with something like use 8 instead of xyz you should see a final result on the CLI like I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32. as a final step, lets explore using a custom html template for the email cp ./walkthrough/11c-cli.ts src/cli.ts first try with divide: npx tsx src/index.ts 'can you divide 4 by 5' you should see a slightly different email with the custom template ![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true) feel free to run with the flow and then you can try updating the template to your liking (if you're using cursor, something as simple as highlighting the template and asking to "make it better" should do the trick) try triggering "request_more_information" as well! thats it - in the next chapter, we'll build a fully email-driven workflow agent that uses webhooks for human approval # Chapter XX - HumanLayer Webhook Integration the previous sections used the humanlayer SDK in "synchronous mode" - that means every time we wait for human approval, we sit in a loop polling until the human response if received. That's obviously not ideal, especially for production workloads, so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md) by updating the server to end processing after contacting a human, and use webhooks to receive the results. add code to initialize humanlayer in the server cp ./walkthrough/12-1-server-init.ts src/server.ts next, lets update the /thread endpoint to 1. handle requests asynchronously, returning immediately 2. create a human contact on request_more_information and done_for_now calls Update the server to be able to handle request_clarification responses - remove the old /response endpoint and types - update the /thread endpoint to run processing asynchronously, return immediately - send a state.threadId when requesting human responses - add a handleHumanResponse function to process the human response - add a /webhook endpoint to handle the webhook response cp ./walkthrough/12a-server.ts src/server.ts Start the server in another terminal npx tsx src/server.ts now that the server is running, send a payload to the '/thread' endpoint __ do the response step __ now handle approvals for divide __ now also handle done_for_now ================================================ FILE: workshops/2025-05/sections/final/baml_src/agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05/sections/final/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05/sections/final/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.85.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05/sections/final/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/sections/final/package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "baml": "^0.0.0", "express": "^5.1.0", "humanlayer": "^0.7.7", "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/express": "^5.0.1", "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0", "supertest": "^7.1.0" } } ================================================ FILE: workshops/2025-05/sections/final/src/agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } awaitingHumanResponse(): boolean { const lastEvent = this.events[this.events.length - 1]; return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); } awaitingHumanApproval(): boolean { const lastEvent = this.events[this.events.length - 1]; return lastEvent.data.intent === 'divide'; } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "divide": // divide is scary, return it for human approval return thread; case "add": case "subtract": case "multiply": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/sections/final/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, // custom email body - jinja template: `{% if type == 'request_more_information' %} {{ event.spec.msg }} {% else %} agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} with args: {{event.spec.kwargs}}

reply to this email to approve {% endif %}` } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05/sections/final/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/sections/final/src/server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/sections/final/src/state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ================================================ FILE: workshops/2025-05/sections/final/tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/walkthrough/00-.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05/walkthrough/00-index.ts ================================================ async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ================================================ FILE: workshops/2025-05/walkthrough/00-package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05/walkthrough/00-tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05/walkthrough/01-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05/walkthrough/01-agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ================================================ FILE: workshops/2025-05/walkthrough/01-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05/walkthrough/01-index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05/walkthrough/02-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05/walkthrough/02-tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05/walkthrough/03-agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": thread.events.push({ "type": "tool_call", "data": nextStep }); const result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); continue; default: throw new Error(`Unknown intent: ${nextStep.intent}`); } } } ================================================ FILE: workshops/2025-05/walkthrough/03b-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/walkthrough/04-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } } ================================================ FILE: workshops/2025-05/walkthrough/04b-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(hello, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(math_operation, {{this.intent == "multiply"}}) } ================================================ FILE: workshops/2025-05/walkthrough/04c-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } ================================================ FILE: workshops/2025-05/walkthrough/05-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } ================================================ FILE: workshops/2025-05/walkthrough/05-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/walkthrough/05-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); let lastEvent = result.events.slice(-1)[0]; while (lastEvent.data.intent === "request_more_information") { const message = await askHuman(lastEvent.data.message); thread.events.push({ type: "human_response", data: message }); const result = await agentLoop(thread); lastEvent = result.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(message: string) { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve(answer); }); }); } ================================================ FILE: workshops/2025-05/walkthrough/05b-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05/walkthrough/05c-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05/walkthrough/06-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} First, always plan out what to do next, for example: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05/walkthrough/07-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events, null, 2); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/walkthrough/07b-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/walkthrough/07c-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05/walkthrough/08-server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; const app = express(); app.use(express.json()); app.set('json spaces', 2); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const result = await agentLoop(thread); res.json(result); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { // optional - add state res.status(404).json({ error: "Not implemented yet" }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/walkthrough/09-server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } thread.events.push({ type: "human_response", data: req.body.message }); // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/walkthrough/09-state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ================================================ FILE: workshops/2025-05/walkthrough/10-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } awaitingHumanResponse(): boolean { const lastEvent = this.events[this.events.length - 1]; return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); } awaitingHumanApproval(): boolean { const lastEvent = this.events[this.events.length - 1]; return lastEvent.data.intent === 'divide'; } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "divide": // divide is scary, return it for human approval return thread; case "add": case "subtract": case "multiply": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05/walkthrough/10-server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/walkthrough/11-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, } } }) if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05/walkthrough/11b-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05/walkthrough/11c-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, // custom email body - jinja template: `{% if type == 'request_more_information' %} {{ event.spec.msg }} {% else %} agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} with args: {{event.spec.kwargs}}

reply to this email to approve {% endif %}` } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05/walkthrough/12-1-server-init.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const result = await agentLoop(thread); store.update(req.params.id, result); lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(result); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/walkthrough/12-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/walkthrough/12a-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/walkthrough/12aa-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/walkthrough/12b-server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { V1Beta2EmailEventReceived, V1Beta2FunctionCallCompleted, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const result = await agentLoop(thread); store.update(req.params.id, result); lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(result); }); type WebhookResponse = V1Beta2HumanContactCompleted; app.post('/webhook/response', async (req, res) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } thread.events.push({ type: "human_response", data: response.event.status?.response }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05/walkthrough.md ================================================ # Building the 12-factor agent template from scratch Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology. ## Cleanup Make sure you're starting from a clean slate Clean up existing files rm -rf baml_src/ && rm -rf src/ ## Chapter 0 - Hello World Let's start with a basic TypeScript setup and a hello world program. This guide is written in TypeScript (yes, a python version is coming soon) There are many checkpoints between the every file edit in theworkshop steps, so even if you aren't super familiar with typescript, you should be able to keep up and run each example. To run this guide, you'll need a relatively recent version of nodejs and npm installed You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine brew install node@20 You should see the node version node --version Copy initial package.json cp ./walkthrough/00-package.json package.json
show file ```json // ./walkthrough/00-package.json { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ```
Install dependencies npm install Copy tsconfig.json cp ./walkthrough/00-tsconfig.json tsconfig.json
show file ```json // ./walkthrough/00-tsconfig.json { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ```
add .gitignore cp ./walkthrough/00-.gitignore .gitignore
show file ```gitignore // ./walkthrough/00-.gitignore baml_client/ node_modules/ ```
Create src folder Add a simple hello world index.ts cp ./walkthrough/00-index.ts src/index.ts
show file ```ts // ./walkthrough/00-index.ts async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ```
Run it to verify npx tsx src/index.ts You should see: hello, world! ## Chapter 1 - CLI and Agent Loop Now let's add BAML and create our first agent with a CLI interface. First, we'll need to install [BAML](https://github.com/boundaryml/baml) which is a tool for prompting and structured outputs. npm install @boundaryml/baml Initialize BAML npx baml-cli init Remove default resume.baml rm baml_src/resume.baml Add our starter agent, a single baml prompt that we'll build on cp ./walkthrough/01-agent.baml baml_src/agent.baml
show file ```rust // ./walkthrough/01-agent.baml class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> DoneForNow { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ```
Generate BAML client code npx baml-cli generate Enable BAML logging for this section export BAML_LOG=debug Add the CLI interface cp ./walkthrough/01-cli.ts src/cli.ts
show file ```ts // ./walkthrough/01-cli.ts // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ```
Update index.ts to use the CLI ```diff src/index.ts +import { cli } from "./cli" + async function hello(): Promise { console.log('hello, world!') async function main() { - await hello() + await cli() } ```
skip this step cp ./walkthrough/01-index.ts src/index.ts
Add the agent implementation cp ./walkthrough/01-agent.ts src/agent.ts
show file ```ts // ./walkthrough/01-agent.ts import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ```
The the BAML code is configured to use OPENAI_API_KEY by default As you're testing, you can change the model / provider to something else as you please client "openai/gpt-4o" [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms) For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider. If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key. export OPENAI_API_KEY=... Try it out npx tsx src/index.ts hello you should see a familiar response from the model { intent: 'done_for_now', message: 'Hello! How can I assist you today?' } ## Chapter 2 - Add Calculator Tools Let's add some calculator tools to our agent. Let's start by adding a tool definition for the calculator These are simpile structured outputs that we'll ask the model to return as a "next step" in the agentic loop. cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
show file ```rust // ./walkthrough/02-tool_calculator.baml type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ```
Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps ```diff baml_src/agent.baml function DetermineNextStep( thread: string -) -> DoneForNow { +) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" ```
skip this step cp ./walkthrough/02-agent.baml baml_src/agent.baml
Generate updated BAML client npx baml-cli generate Try out the calculator npx tsx src/index.ts 'can you add 3 and 4' You should see a tool call to the calculator { intent: 'add', a: 3, b: 4 } ## Chapter 3 - Process Tool Calls in a Loop Now let's add a real agentic loop that can run the tools and get a final answer from the LLM. First, lets update the agent to handle the tool call ```diff src/agent.ts } -// right now this just runs one turn with the LLM, but -// we'll update this function to handle all the agent logic -export async function agentLoop(thread: Thread): Promise { - const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); - return nextStep; + + +export async function agentLoop(thread: Thread): Promise { + + while (true) { + const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); + console.log("nextStep", nextStep); + + switch (nextStep.intent) { + case "done_for_now": + // response to human, return the next step object + return nextStep.message; + case "add": + thread.events.push({ + "type": "tool_call", + "data": nextStep + }); + const result = nextStep.a + nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + continue; + default: + throw new Error(`Unknown intent: ${nextStep.intent}`); + } + } } ```
skip this step cp ./walkthrough/03-agent.ts src/agent.ts
Now, lets try it out npx tsx src/index.ts 'can you add 3 and 4' you should see the agent call the tool and then return the result { intent: 'done_for_now', message: 'The sum of 3 and 4 is 7.' } For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output export BAML_LOG=off Try a multi-step calculation npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result' you'll notice that tools like multiply and divide are not available npx tsx src/index.ts 'can you multiply 3 and 4' next, let's add handlers for the rest of the calculator tools ```diff src/agent.ts -import { b } from "../baml_client"; +import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; -// tool call or a respond to human tool -type AgentResponse = Awaited>; - export interface Event { type: string } +export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; +export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { + let result: number; + switch (nextStep.intent) { + case "add": + result = nextStep.a + nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "subtract": + result = nextStep.a - nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "multiply": + result = nextStep.a * nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "divide": + result = nextStep.a / nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + } +} export async function agentLoop(thread: Thread): Promise { console.log("nextStep", nextStep); + thread.events.push({ + "type": "tool_call", + "data": nextStep + }); + switch (nextStep.intent) { case "done_for_now": return nextStep.message; case "add": - thread.events.push({ - "type": "tool_call", - "data": nextStep - }); - const result = nextStep.a + nextStep.b; - console.log("tool_response", result); - thread.events.push({ - "type": "tool_response", - "data": result - }); - continue; - default: - throw new Error(`Unknown intent: ${nextStep.intent}`); + case "subtract": + case "multiply": + case "divide": + thread = await handleNextStep(nextStep, thread); } } ```
skip this step cp ./walkthrough/03b-agent.ts src/agent.ts
Test subtraction npx tsx src/index.ts 'can you subtract 3 from 4' now, let's test the multiplication tool npx tsx src/index.ts 'can you multiply 3 and 4' finally, let's test a more complex calculation with multiple operations npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' ## Chapter 4 - Add Tests to agent.baml Let's add some tests to our BAML agent. to start, leave the baml logs enabled export BAML_LOG=debug next, let's add some tests to the agent We'll start with a simple test that checks the agent's ability to handle a basic calculation. ```diff baml_src/agent.baml "# } + +test MathOperation { + functions [DetermineNextStep] + args { + thread #" + { + "type": "user_input", + "data": "can you multiply 3 and 4?" + } + "# + } +} + ```
skip this step cp ./walkthrough/04-agent.baml baml_src/agent.baml
Run the tests npx baml-cli test now, let's improve the test with assertions! Assertions are a great way to make sure the agent is working as expected, and can easily be extended to check for more complex behavior. ```diff baml_src/agent.baml "# } + @@assert(hello, {{this.intent == "done_for_now"}}) } "# } + @@assert(math_operation, {{this.intent == "multiply"}}) } ```
skip this step cp ./walkthrough/04b-agent.baml baml_src/agent.baml
Run the tests npx baml-cli test as you add more tests, you can disable the logs to keep the output clean. You may want to turn them on as you iterate on specific tests. export BAML_LOG=off now, let's add some more complex test cases, where we resume from in the middle of an in-progress agentic context window ```diff baml_src/agent.baml "# } - @@assert(hello, {{this.intent == "done_for_now"}}) + @@assert(intent, {{this.intent == "done_for_now"}}) } "# } - @@assert(math_operation, {{this.intent == "multiply"}}) + @@assert(intent, {{this.intent == "multiply"}}) } +test LongMath { + functions [DetermineNextStep] + args { + thread #" + [ + { + "type": "user_input", + "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" + }, + { + "type": "tool_call", + "data": { + "intent": "multiply", + "a": 3, + "b": 4 + } + }, + { + "type": "tool_response", + "data": 12 + }, + { + "type": "tool_call", + "data": { + "intent": "divide", + "a": 12, + "b": 2 + } + }, + { + "type": "tool_response", + "data": 6 + }, + { + "type": "tool_call", + "data": { + "intent": "add", + "a": 6, + "b": 12 + } + }, + { + "type": "tool_response", + "data": 18 + } + ] + "# + } + @@assert(intent, {{this.intent == "done_for_now"}}) + @@assert(answer, {{"18" in this.message}}) +} + ```
skip this step cp ./walkthrough/04c-agent.baml baml_src/agent.baml
let's try to run it npx baml-cli test ## Chapter 5 - Multiple Human Tools In this section, we'll add support for multiple tools that serve to contact humans. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off first, let's add a tool that can request clarification from a human this will be different from the "done_for_now" tool, and can be used to more flexibly handle different types of human interactions in your agent. ```diff baml_src/agent.baml +// human tools are async requests to a human +type HumanTools = ClarificationRequest | DoneForNow + +class ClarificationRequest { + intent "request_more_information" @description("you can request more information from me") + message string +} + class DoneForNow { intent "done_for_now" - message string + + message string @description(#" + message to send to the user about the work that was done. + "#) } function DetermineNextStep( thread: string -) -> CalculatorTools | DoneForNow { +) -> HumanTools | CalculatorTools { client "openai/gpt-4o" } + ```
skip this step cp ./walkthrough/05-agent.baml baml_src/agent.baml
next, let's re-generate the client code NOTE - if you're using the VSCode extension for BAML, the client will be regenerated automatically when you save the file in your editor. npx baml-cli generate now, let's update the agent to use the new tool ```diff src/agent.ts } -export async function agentLoop(thread: Thread): Promise { +export async function agentLoop(thread: Thread): Promise { while (true) { switch (nextStep.intent) { case "done_for_now": - // response to human, return the next step object - return nextStep.message; + case "request_more_information": + // response to human, return the thread + return thread; case "add": case "subtract": ```
skip this step cp ./walkthrough/05-agent.ts src/agent.ts
next, let's update the CLI to handle clarification requests by requesting input from the user on the CLI ```diff src/cli.ts // cli.ts lets you invoke the agent loop from the command line -import { agentLoop, Thread, Event } from "./agent"; +import { agentLoop, Thread, Event } from "../src/agent"; + + export async function cli() { // Get command line arguments, skipping the first two (node and script name) // Run the agent loop with the thread const result = await agentLoop(thread); - console.log(result); + let lastEvent = result.events.slice(-1)[0]; + + while (lastEvent.data.intent === "request_more_information") { + const message = await askHuman(lastEvent.data.message); + thread.events.push({ type: "human_response", data: message }); + const result = await agentLoop(thread); + lastEvent = result.events.slice(-1)[0]; + } + + // print the final result + // optional - you could loop here too + console.log(lastEvent.data.message); + process.exit(0); } + +async function askHuman(message: string) { + const readline = require('readline').createInterface({ + input: process.stdin, + output: process.stdout + }); + + return new Promise((resolve) => { + readline.question(`${message}\n> `, (answer: string) => { + resolve(answer); + }); + }); +} ```
skip this step cp ./walkthrough/05-cli.ts src/cli.ts
let's try it out npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& ' next, let's add a test that checks the agent's ability to handle a clarification request ```diff baml_src/agent.baml + +test MathOperationWithClarification { + functions [DetermineNextStep] + args { + thread #" + [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] + "# + } + @@assert(intent, {{this.intent == "request_more_information"}}) +} + +test MathOperationPostClarification { + functions [DetermineNextStep] + args { + thread #" + [ + {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, + {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, + {"type":"human_response","data":"lets try 12 instead"}, + ] + "# + } + @@assert(intent, {{this.intent == "multiply"}}) + @@assert(a, {{this.b == 12}}) + @@assert(b, {{this.a == 3}}) +} + + + ```
skip this step cp ./walkthrough/05b-agent.baml baml_src/agent.baml
and now we can run the tests again npx baml-cli test you'll notice the new test passes, but the hello world test fails This is because the agent's default behavior is to return "done_for_now" ```diff baml_src/agent.baml "# } - @@assert(intent, {{this.intent == "done_for_now"}}) + @@assert(intent, {{this.intent == "request_more_information"}}) } ```
skip this step cp ./walkthrough/05c-agent.baml baml_src/agent.baml
Verify tests pass npx baml-cli test ## Chapter 6 - Customize Your Prompt with Reasoning In this section, we'll explore how to customize the prompt of the agent with reasoning steps. this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts) for this section, it will be helpful to leave the baml logs enabled export BAML_LOG=debug update the agent prompt to include a reasoning step ```diff baml_src/agent.baml {{ ctx.output_format }} + + First, always plan out what to do next, for example: + + - ... + - ... + - ... + + {...} // schema "# } @@assert(b, {{this.a == 3}}) } - - ```
skip this step cp ./walkthrough/06-agent.baml baml_src/agent.baml
generate the updated client npx baml-cli generate now, you can try it out with a simple prompt npx tsx src/index.ts 'can you multiply 3 and 4' you should see output from the baml logs showing the reasoning steps #### optional challenge add a field to your tool output format that includes the reasoning steps in the output! ## Chapter 7 - Customize Your Context Window In this section, we'll explore how to customize the context window of the agent. this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) update the agent to pretty-print the Context window for the model ```diff src/agent.ts // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 - return JSON.stringify(this.events); + return JSON.stringify(this.events, null, 2); } } ```
skip this step cp ./walkthrough/07-agent.ts src/agent.ts
Test the formatting BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' next, let's update the agent to use XML formatting instead this is a very popular format for passing data to a model, among other things, because of the token efficiency of XML. ```diff src/agent.ts serializeForLLM() { - // can change this to whatever custom serialization you want to do, XML, etc - // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 - return JSON.stringify(this.events, null, 2); + return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } + + trimLeadingWhitespace(s: string) { + return s.replace(/^[ \t]+/gm, ''); + } + + serializeOneEvent(e: Event) { + return this.trimLeadingWhitespace(` + <${e.data?.intent || e.type}> + ${ + typeof e.data !== 'object' ? e.data : + Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} + + `) + } } ```
skip this step cp ./walkthrough/07b-agent.ts src/agent.ts
let's try it out BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' lets update our tests to match the new output format ```diff baml_src/agent.baml {{ ctx.output_format }} - First, always plan out what to do next, for example: + Always think about what to do next first, like: - ... args { thread #" - { - "type": "user_input", - "data": "hello!" - } + + hello! + "# } args { thread #" - { - "type": "user_input", - "data": "can you multiply 3 and 4?" - } + + can you multiply 3 and 4? + "# } args { thread #" - [ - { - "type": "user_input", - "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" - }, - { - "type": "tool_call", - "data": { - "intent": "multiply", - "a": 3, - "b": 4 - } - }, - { - "type": "tool_response", - "data": 12 - }, - { - "type": "tool_call", - "data": { - "intent": "divide", - "a": 12, - "b": 2 - } - }, - { - "type": "tool_response", - "data": 6 - }, - { - "type": "tool_call", - "data": { - "intent": "add", - "a": 6, - "b": 12 - } - }, - { - "type": "tool_response", - "data": 18 - } - ] + + can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? + + + + + a: 3 + b: 4 + + + + + 12 + + + + + a: 12 + b: 2 + + + + + 6 + + + + + a: 6 + b: 12 + + + + + 18 + + "# } args { thread #" - [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] + + can you multiply 3 and fe1iiaff10 + "# } args { thread #" - [ - {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, - {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, - {"type":"human_response","data":"lets try 12 instead"}, - ] + + can you multiply 3 and FD*(#F&& ? + + + + message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? + + + + lets try 12 instead + "# } @@assert(intent, {{this.intent == "multiply"}}) } ```
skip this step cp ./walkthrough/07c-agent.baml baml_src/agent.baml
check out the updated tests npx baml-cli test ## Chapter 8 - Adding API Endpoints Add an Express server to expose the agent via HTTP. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install Express and types npm install express && npm install --save-dev @types/express supertest Add the server implementation cp ./walkthrough/08-server.ts src/server.ts
show file ```ts // ./walkthrough/08-server.ts import express from 'express'; import { Thread, agentLoop } from '../src/agent'; const app = express(); app.use(express.json()); app.set('json spaces', 2); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const result = await agentLoop(thread); res.json(result); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { // optional - add state res.status(404).json({ error: "Not implemented yet" }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ```
Start the server npx tsx src/server.ts Test with curl (in another terminal) curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you add 3 and 4"}' You should get an answer from the agent which includes the agentic trace, ending in a message like: {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."} ## Chapter 9 - In-Memory State and Async Clarification Add state management and async clarification support. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Add some simple in-memory state management for threads cp ./walkthrough/09-state.ts src/state.ts
show file ```ts // ./walkthrough/09-state.ts import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ```
update the server to use the state management * Add thread state management using `ThreadStore` * return thread IDs and response URLs from the /thread endpoint * implement GET /thread/:id * implement POST /thread/:id/response ```diff src/server.ts import express from 'express'; import { Thread, agentLoop } from '../src/agent'; +import { ThreadStore } from '../src/state'; const app = express(); app.set('json spaces', 2); +const store = new ThreadStore(); + // POST /thread - Start new thread app.post('/thread', async (req, res) => { data: req.body.message }]); - const result = await agentLoop(thread); - res.json(result); + + const threadId = store.create(thread); + const newThread = await agentLoop(thread); + + store.update(threadId, newThread); + + const lastEvent = newThread.events[newThread.events.length - 1]; + // If we exited the loop, include the response URL so the client can + // push a new message onto the thread + lastEvent.data.response_url = `/thread/${threadId}/response`; + + console.log("returning last event from endpoint", lastEvent); + + res.json({ + thread_id: threadId, + ...newThread + }); }); app.get('/thread/:id', (req, res) => { - // optional - add state - res.status(404).json({ error: "Not implemented yet" }); + const thread = store.get(req.params.id); + if (!thread) { + return res.status(404).json({ error: "Thread not found" }); + } + res.json(thread); }); +// POST /thread/:id/response - Handle clarification response +app.post('/thread/:id/response', async (req, res) => { + let thread = store.get(req.params.id); + if (!thread) { + return res.status(404).json({ error: "Thread not found" }); + } + + thread.events.push({ + type: "human_response", + data: req.body.message + }); + + // loop until stop event + const newThread = await agentLoop(thread); + + store.update(req.params.id, newThread); + + const lastEvent = newThread.events[newThread.events.length - 1]; + lastEvent.data.response_url = `/thread/${req.params.id}/response`; + + console.log("returning last event from endpoint", lastEvent); + + res.json(newThread); +}); + const port = process.env.PORT || 3000; app.listen(port, () => { ```
skip this step cp ./walkthrough/09-server.ts src/server.ts
Start the server npx tsx src/server.ts Test clarification flow curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you multiply 3 and xyz"}' ## Chapter 10 - Adding Human Approval Add support for human approval of operations. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off update the server to handle human approvals * Import `handleNextStep` to execute approved actions * Add two payload types to distinguish approvals from responses * Handle responses and approvals differently in the endpoint * Show better error messages when things go wrongs ```diff src/server.ts import express from 'express'; -import { Thread, agentLoop } from '../src/agent'; +import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; }); + +type ApprovalPayload = { + type: "approval"; + approved: boolean; + comment?: string; +} + +type ResponsePayload = { + type: "response"; + response: string; +} + +type Payload = ApprovalPayload | ResponsePayload; + // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { return res.status(404).json({ error: "Thread not found" }); } + + const body: Payload = req.body; + + let lastEvent = thread.events[thread.events.length - 1]; + + if (thread.awaitingHumanResponse() && body.type === 'response') { + thread.events.push({ + type: "human_response", + data: body.response + }); + } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { + // push feedback onto the thread + thread.events.push({ + type: "tool_response", + data: `user denied the operation with feedback: "${body.comment}"` + }); + } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { + // approved, run the tool, pushing results onto the thread + await handleNextStep(lastEvent.data, thread); + } else { + res.status(400).json({ + error: "Invalid request: " + body.type, + awaitingHumanResponse: thread.awaitingHumanResponse(), + awaitingHumanApproval: thread.awaitingHumanApproval() + }); + return; + } + - thread.events.push({ - type: "human_response", - data: req.body.message - }); - // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); - const lastEvent = newThread.events[newThread.events.length - 1]; + lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; ```
skip this step cp ./walkthrough/10-server.ts src/server.ts
Add a few methods to the agent to handle approvals and responses ```diff src/agent.ts `) } + + awaitingHumanResponse(): boolean { + const lastEvent = this.events[this.events.length - 1]; + return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); + } + + awaitingHumanApproval(): boolean { + const lastEvent = this.events[this.events.length - 1]; + return lastEvent.data.intent === 'divide'; + } } // response to human, return the thread return thread; + case "divide": + // divide is scary, return it for human approval + return thread; case "add": case "subtract": case "multiply": - case "divide": thread = await handleNextStep(nextStep, thread); } ```
skip this step cp ./walkthrough/10-agent.ts src/agent.ts
Start the server npx tsx src/server.ts Test division with approval curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you divide 3 by 4"}' You should see: { "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b", "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } } ] } reject the request with another curl call, changing the thread ID curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}' You should see: the last tool call is now `"intent":"divide","a":3,"b":5` { "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } }, { "type": "tool_response", "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\"" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 5, "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response" } } ] } now you can approve the operation curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": true}' you should see the final message includes the tool response and final result! ... { "type": "tool_response", "data": 0.5 }, { "type": "done_for_now", "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!", "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response" } ## Chapter 11 - Human Approvals over email in this section, we'll add support for human approvals over email. This will start a little bit contrived, just to get the concepts down - We'll start by invoking the workflow from the CLI but approvals for `divide` and `request_more_information` will be handled over email, then the final `done_for_now` answer will be printed back to the CLI While contrived, this is a great example of the flexibility you get from [factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md) for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install HumanLayer npm install humanlayer Update CLI to send `divide` and `request_more_information` to a human via email ```diff src/cli.ts // cli.ts lets you invoke the agent loop from the command line +import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; - - export async function cli() { // Get command line arguments, skipping the first two (node and script name) // Run the agent loop with the thread - const result = await agentLoop(thread); - let lastEvent = result.events.slice(-1)[0]; + let newThread = await agentLoop(thread); + let lastEvent = newThread.events.slice(-1)[0]; - while (lastEvent.data.intent === "request_more_information") { - const message = await askHuman(lastEvent.data.message); - thread.events.push({ type: "human_response", data: message }); - const result = await agentLoop(thread); - lastEvent = result.events.slice(-1)[0]; + while (lastEvent.data.intent !== "done_for_now") { + const responseEvent = await askHuman(lastEvent); + thread.events.push(responseEvent); + newThread = await agentLoop(thread); + lastEvent = newThread.events.slice(-1)[0]; } // print the final result console.log(lastEvent.data.message); process.exit(0); } -async function askHuman(message: string) { +async function askHuman(lastEvent: Event): Promise { + if (process.env.HUMANLAYER_API_KEY) { + return await askHumanEmail(lastEvent); + } else { + return await askHumanCLI(lastEvent.data.message); + } +} + +async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { - resolve(answer); + resolve({ type: "human_response", data: answer }); }); }); } + +export async function askHumanEmail(lastEvent: Event): Promise { + if (!process.env.HUMANLAYER_EMAIL) { + throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); + } + const hl = humanlayer({ //reads apiKey from env + // name of this agent + runId: "12fa-cli-agent", + verbose: true, + contactChannel: { + // agent should request permission via email + email: { + address: process.env.HUMANLAYER_EMAIL, + } + } + }) + + if (lastEvent.data.intent === "divide") { + // fetch approval synchronously - this will block until reply + const response = await hl.fetchHumanApproval({ + spec: { + fn: "divide", + kwargs: { + a: lastEvent.data.a, + b: lastEvent.data.b + } + } + }) + + if (response.approved) { + const result = lastEvent.data.a / lastEvent.data.b; + console.log("tool_response", result); + return { + "type": "tool_response", + "data": result + }; + } else { + return { + "type": "tool_response", + "data": `user denied operation ${lastEvent.data.intent} + with feedback: ${response.comment}` + }; + } + } + throw new Error(`unknown tool: ${lastEvent.data.intent}`) +} ```
skip this step cp ./walkthrough/11-cli.ts src/cli.ts
Run the CLI npx tsx src/index.ts 'can you divide 4 by 5' The last line of your program should mention human review step nextStep { intent: 'divide', a: 4, b: 5 } HumanLayer: Requested human approval from HumanLayer cloud go ahead and respond to the email with some feedback: ![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true) you should get another email with an updated attempt based on your feedback! You can go ahead and approve this one: ![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true) and your final output will look like nextStep { intent: 'done_for_now', message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!' } The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask! lets implement the `request_more_information` flow as well ```diff src/cli.ts }) + if (lastEvent.data.intent === "request_more_information") { + // fetch response synchronously - this will block until reply + const response = await hl.fetchHumanResponse({ + spec: { + msg: lastEvent.data.message + } + }) + return { + "type": "tool_response", + "data": response + } + } + if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply ```
skip this step cp ./walkthrough/11b-cli.ts src/cli.ts
lets test the require_approval flow as by asking for a calculation with garbled input: npx tsx src/index.ts 'can you multiply 4 and xyz' You should get an email with a request for clarification Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else? you can response with something like use 8 instead of xyz you should see a final result on the CLI like I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32. as a final step, lets explore using a custom html template for the email ```diff src/cli.ts email: { address: process.env.HUMANLAYER_EMAIL, + // custom email body - jinja + template: `{% if type == 'request_more_information' %} +{{ event.spec.msg }} +{% else %} +agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} +with args: {{event.spec.kwargs}} +

+reply to this email to approve +{% endif %}` } } ```
skip this step cp ./walkthrough/11c-cli.ts src/cli.ts
first try with divide: npx tsx src/index.ts 'can you divide 4 by 5' you should see a slightly different email with the custom template ![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true) feel free to run with the flow and then you can try updating the template to your liking (if you're using cursor, something as simple as highlighting the template and asking to "make it better" should do the trick) try triggering "request_more_information" as well! thats it - in the next chapter, we'll build a fully email-driven workflow agent that uses webhooks for human approval ## Chapter XX - HumanLayer Webhook Integration the previous sections used the humanlayer SDK in "synchronous mode" - that means every time we wait for human approval, we sit in a loop polling until the human response if received. That's obviously not ideal, especially for production workloads, so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md) by updating the server to end processing after contacting a human, and use webhooks to receive the results. add code to initialize humanlayer in the server ```diff src/server.ts import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; +import { humanlayer } from 'humanlayer'; const app = express(); const store = new ThreadStore(); +const getHumanlayer = () => { + const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; + if (!HUMANLAYER_EMAIL) { + throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); + } + + const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; + if (!HUMANLAYER_API_KEY) { + throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); + } + return humanlayer({ + runId: `12fa-agent`, + contactChannel: { + email: { address: HUMANLAYER_EMAIL } + } + }); +} + // POST /thread - Start new thread app.post('/thread', async (req, res) => { // loop until stop event - const newThread = await agentLoop(thread); + const result = await agentLoop(thread); - store.update(req.params.id, newThread); + store.update(req.params.id, result); - lastEvent = newThread.events[newThread.events.length - 1]; + lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); - res.json(newThread); + res.json(result); }); ```
skip this step cp ./walkthrough/12-1-server-init.ts src/server.ts
next, lets update the /thread endpoint to 1. handle requests asynchronously, returning immediately 2. create a human contact on request_more_information and done_for_now calls Update the server to be able to handle request_clarification responses - remove the old /response endpoint and types - update the /thread endpoint to run processing asynchronously, return immediately - send a state.threadId when requesting human responses - add a handleHumanResponse function to process the human response - add a /webhook endpoint to handle the webhook response ```diff src/server.ts -import express from 'express'; +import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; -import { humanlayer } from 'humanlayer'; +import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); }); } - // POST /thread - Start new thread -app.post('/thread', async (req, res) => { +app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", }]); - const threadId = store.create(thread); - const newThread = await agentLoop(thread); - - store.update(threadId, newThread); + // run agent loop asynchronously, return immediately + Promise.resolve().then(async () => { + const threadId = store.create(thread); + const newThread = await agentLoop(thread); + + store.update(threadId, newThread); - const lastEvent = newThread.events[newThread.events.length - 1]; - // If we exited the loop, include the response URL so the client can - // push a new message onto the thread - lastEvent.data.response_url = `/thread/${threadId}/response`; + const lastEvent = newThread.events[newThread.events.length - 1]; - console.log("returning last event from endpoint", lastEvent); - - res.json({ - thread_id: threadId, - ...newThread + if (thread.awaitingHumanResponse()) { + const hl = getHumanlayer(); + // create a human contact - returns immediately + hl.createHumanContact({ + spec: { + msg: lastEvent.data.message, + state: { + thread_id: threadId, + } + } + }); + } }); + + res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status -app.get('/thread/:id', (req, res) => { +app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { }); +type WebhookResponse = V1Beta2HumanContactCompleted; -type ApprovalPayload = { - type: "approval"; - approved: boolean; - comment?: string; -} +const handleHumanResponse = async (req: Request, res: Response) => { -type ResponsePayload = { - type: "response"; - response: string; } -type Payload = ApprovalPayload | ResponsePayload; +app.post('/webhook', async (req: Request, res: Response) => { + console.log("webhook response", req.body); + const response = req.body as WebhookResponse; -// POST /thread/:id/response - Handle clarification response -app.post('/thread/:id/response', async (req, res) => { - let thread = store.get(req.params.id); + // response is guaranteed to be set on a webhook + const humanResponse: string = response.event.status?.response as string; + + const threadId = response.event.spec.state?.thread_id; + if (!threadId) { + return res.status(400).json({ error: "Thread ID not found" }); + } + + const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } - const body: Payload = req.body; - - let lastEvent = thread.events[thread.events.length - 1]; - - if (thread.awaitingHumanResponse() && body.type === 'response') { - thread.events.push({ - type: "human_response", - data: body.response - }); - } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { - // push feedback onto the thread - thread.events.push({ - type: "tool_response", - data: `user denied the operation with feedback: "${body.comment}"` - }); - } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { - // approved, run the tool, pushing results onto the thread - await handleNextStep(lastEvent.data, thread); - } else { - res.status(400).json({ - error: "Invalid request: " + body.type, - awaitingHumanResponse: thread.awaitingHumanResponse(), - awaitingHumanApproval: thread.awaitingHumanApproval() - }); - return; + if (!thread.awaitingHumanResponse()) { + return res.status(400).json({ error: "Thread is not awaiting human response" }); } - - // loop until stop event - const result = await agentLoop(thread); - - store.update(req.params.id, result); - - lastEvent = result.events[result.events.length - 1]; - lastEvent.data.response_url = `/thread/${req.params.id}/response`; - - console.log("returning last event from endpoint", lastEvent); - - res.json(result); }); ```
skip this step cp ./walkthrough/12a-server.ts src/server.ts
Start the server in another terminal npx tsx src/server.ts now that the server is running, send a payload to the '/thread' endpoint __ do the response step __ now handle approvals for divide __ now also handle done_for_now ================================================ FILE: workshops/2025-05/walkthrough.yaml ================================================ title: "Building the 12-factor agent template from scratch" text: "Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology." targets: - markdown: "./build/walkthrough.md" onChange: diff: true cp: true newFiles: cat: false cp: true - folders: path: "./build/sections" skip: - "cleanup" final: dirName: "final" sections: - name: cleanup title: "Cleanup" text: "Make sure you're starting from a clean slate" steps: - text: "Clean up existing files" command: | rm -rf baml_src/ && rm -rf src/ - name: hello-world title: "Chapter 0 - Hello World" text: "Let's start with a basic TypeScript setup and a hello world program." steps: - text: | This guide is written in TypeScript (yes, a python version is coming soon) There are many checkpoints between the every file edit in theworkshop steps, so even if you aren't super familiar with typescript, you should be able to keep up and run each example. To run this guide, you'll need a relatively recent version of nodejs and npm installed You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine command: brew install node@20 results: - text: "You should see the node version" code: | node --version - text: "Copy initial package.json" file: {src: ./walkthrough/00-package.json, dest: package.json} - text: "Install dependencies" command: | npm install incremental: true - text: "Copy tsconfig.json" file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json} - text: "add .gitignore" file: {src: ./walkthrough/00-.gitignore, dest: .gitignore} - text: "Create src folder" dir: {create: true, path: src} - text: "Add a simple hello world index.ts" file: {src: ./walkthrough/00-index.ts, dest: src/index.ts} - text: "Run it to verify" command: | npx tsx src/index.ts results: - text: "You should see:" code: | hello, world! - name: cli-and-agent title: "Chapter 1 - CLI and Agent Loop" text: "Now let's add BAML and create our first agent with a CLI interface." steps: - text: | First, we'll need to install [BAML](https://github.com/boundaryml/baml) which is a tool for prompting and structured outputs. command: | npm install @boundaryml/baml incremental: true - text: "Initialize BAML" command: | npx baml-cli init incremental: true - text: "Remove default resume.baml" command: | rm baml_src/resume.baml incremental: true - text: "Add our starter agent, a single baml prompt that we'll build on" file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml} - text: "Generate BAML client code" command: | npx baml-cli generate incremental: true - text: "Enable BAML logging for this section" command: | export BAML_LOG=debug - text: "Add the CLI interface" file: {src: ./walkthrough/01-cli.ts, dest: src/cli.ts} - text: "Update index.ts to use the CLI" file: {src: ./walkthrough/01-index.ts, dest: src/index.ts} - text: "Add the agent implementation" file: {src: ./walkthrough/01-agent.ts, dest: src/agent.ts} - text: | The the BAML code is configured to use OPENAI_API_KEY by default As you're testing, you can change the model / provider to something else as you please client "openai/gpt-4o" [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms) For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider. If you want to run the example with no changes, you can set the OPENAI_API_KEY env var to any valid openai key. command: | export OPENAI_API_KEY=... - text: "Try it out" command: | npx tsx src/index.ts hello results: - text: you should see a familiar response from the model code: | { intent: 'done_for_now', message: 'Hello! How can I assist you today?' } - name: calculator-tools title: "Chapter 2 - Add Calculator Tools" text: "Let's add some calculator tools to our agent." steps: - text: | Let's start by adding a tool definition for the calculator These are simpile structured outputs that we'll ask the model to return as a "next step" in the agentic loop. file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml} - text: | Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml} - text: "Generate updated BAML client" command: | npx baml-cli generate incremental: true - text: "Try out the calculator" command: | npx tsx src/index.ts 'can you add 3 and 4' results: - text: "You should see a tool call to the calculator" code: | { intent: 'add', a: 3, b: 4 } - name: tool-loop title: "Chapter 3 - Process Tool Calls in a Loop" text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM." steps: - text: | First, lets update the agent to handle the tool call file: {src: ./walkthrough/03-agent.ts, dest: src/agent.ts} - text: | Now, lets try it out command: | npx tsx src/index.ts 'can you add 3 and 4' results: - text: you should see the agent call the tool and then return the result code: | { intent: 'done_for_now', message: 'The sum of 3 and 4 is 7.' } - text: "For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output" command: | export BAML_LOG=off - text: "Try a multi-step calculation" command: | npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result' - text: "you'll notice that tools like multiply and divide are not available" command: | npx tsx src/index.ts 'can you multiply 3 and 4' - text: | next, let's add handlers for the rest of the calculator tools file: {src: ./walkthrough/03b-agent.ts, dest: src/agent.ts} - text: "Test subtraction" command: | npx tsx src/index.ts 'can you subtract 3 from 4' - text: | now, let's test the multiplication tool command: | npx tsx src/index.ts 'can you multiply 3 and 4' - text: | finally, let's test a more complex calculation with multiple operations command: | npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' - name: baml-tests title: "Chapter 4 - Add Tests to agent.baml" text: "Let's add some tests to our BAML agent." steps: - text: to start, leave the baml logs enabled command: | export BAML_LOG=debug - text: | next, let's add some tests to the agent We'll start with a simple test that checks the agent's ability to handle a basic calculation. file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml} - text: "Run the tests" command: | npx baml-cli test - text: | now, let's improve the test with assertions! Assertions are a great way to make sure the agent is working as expected, and can easily be extended to check for more complex behavior. file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml} - text: "Run the tests" command: | npx baml-cli test - text: | as you add more tests, you can disable the logs to keep the output clean. You may want to turn them on as you iterate on specific tests. command: | export BAML_LOG=off - text: | now, let's add some more complex test cases, where we resume from in the middle of an in-progress agentic context window file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml} - text: | let's try to run it command: | npx baml-cli test - name: human-tools title: "Chapter 5 - Multiple Human Tools" text: | In this section, we'll add support for multiple tools that serve to contact humans. steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: | first, let's add a tool that can request clarification from a human this will be different from the "done_for_now" tool, and can be used to more flexibly handle different types of human interactions in your agent. file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml} - text: | next, let's re-generate the client code NOTE - if you're using the VSCode extension for BAML, the client will be regenerated automatically when you save the file in your editor. command: | npx baml-cli generate incremental: true - text: | now, let's update the agent to use the new tool file: {src: ./walkthrough/05-agent.ts, dest: src/agent.ts} - text: | next, let's update the CLI to handle clarification requests by requesting input from the user on the CLI file: {src: ./walkthrough/05-cli.ts, dest: src/cli.ts} - text: | let's try it out command: | npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& ' - text: | next, let's add a test that checks the agent's ability to handle a clarification request file: {src: ./walkthrough/05b-agent.baml, dest: baml_src/agent.baml} - text: | and now we can run the tests again command: | npx baml-cli test - text: | you'll notice the new test passes, but the hello world test fails This is because the agent's default behavior is to return "done_for_now" file: {src: ./walkthrough/05c-agent.baml, dest: baml_src/agent.baml} - text: "Verify tests pass" command: | npx baml-cli test - name: customize-prompt title: "Chapter 6 - Customize Your Prompt with Reasoning" text: | In this section, we'll explore how to customize the prompt of the agent with reasoning steps. this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts) steps: - text: "for this section, it will be helpful to leave the baml logs enabled" command: | export BAML_LOG=debug - text: | update the agent prompt to include a reasoning step file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml} - text: generate the updated client command: | npx baml-cli generate incremental: true - text: | now, you can try it out with a simple prompt command: | npx tsx src/index.ts 'can you multiply 3 and 4' results: - text: you should see output from the baml logs showing the reasoning steps - text: | #### optional challenge add a field to your tool output format that includes the reasoning steps in the output! - name: context-window title: "Chapter 7 - Customize Your Context Window" text: | In this section, we'll explore how to customize the context window of the agent. this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) steps: - text: | update the agent to pretty-print the Context window for the model file: {src: ./walkthrough/07-agent.ts, dest: src/agent.ts} - text: "Test the formatting" command: | BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' - text: | next, let's update the agent to use XML formatting instead this is a very popular format for passing data to a model, among other things, because of the token efficiency of XML. file: {src: ./walkthrough/07b-agent.ts, dest: src/agent.ts} - text: | let's try it out command: | BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' - text: | lets update our tests to match the new output format file: {src: ./walkthrough/07c-agent.baml, dest: baml_src/agent.baml} - text: | check out the updated tests command: | npx baml-cli test - name: api-endpoints title: "Chapter 8 - Adding API Endpoints" text: "Add an Express server to expose the agent via HTTP." steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: "Install Express and types" command: | npm install express && npm install --save-dev @types/express supertest incremental: true - text: "Add the server implementation" file: {src: ./walkthrough/08-server.ts, dest: src/server.ts} - text: "Start the server" command: | npx tsx src/server.ts - text: "Test with curl (in another terminal)" command: | curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you add 3 and 4"}' results: - text: | You should get an answer from the agent which includes the agentic trace, ending in a message like: code: | {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."} - name: state-management title: "Chapter 9 - In-Memory State and Async Clarification" text: "Add state management and async clarification support." steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: "Add some simple in-memory state management for threads" file: {src: ./walkthrough/09-state.ts, dest: src/state.ts} - text: | update the server to use the state management * Add thread state management using `ThreadStore` * return thread IDs and response URLs from the /thread endpoint * implement GET /thread/:id * implement POST /thread/:id/response file: {src: ./walkthrough/09-server.ts, dest: src/server.ts} - text: "Start the server" command: | npx tsx src/server.ts - text: "Test clarification flow" command: | curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you multiply 3 and xyz"}' - name: human-approval title: "Chapter 10 - Adding Human Approval" text: "Add support for human approval of operations." steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: | update the server to handle human approvals * Import `handleNextStep` to execute approved actions * Add two payload types to distinguish approvals from responses * Handle responses and approvals differently in the endpoint * Show better error messages when things go wrongs file: {src: ./walkthrough/10-server.ts, dest: src/server.ts} - text: "Add a few methods to the agent to handle approvals and responses" file: {src: ./walkthrough/10-agent.ts, dest: src/agent.ts} - text: "Start the server" command: | npx tsx src/server.ts - text: "Test division with approval" command: | curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you divide 3 by 4"}' results: - text: "You should see:" code: | { "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b", "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } } ] } - text: "reject the request with another curl call, changing the thread ID" command: | curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}' results: - text: 'You should see: the last tool call is now `"intent":"divide","a":3,"b":5`' code: | { "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } }, { "type": "tool_response", "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\"" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 5, "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response" } } ] } - text: "now you can approve the operation" command: | curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": true}' results: - text: "you should see the final message includes the tool response and final result!" code: | ... { "type": "tool_response", "data": 0.5 }, { "type": "done_for_now", "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!", "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response" } - name: humanlayer-approval title: "Chapter 11 - Human Approvals over email" text: | in this section, we'll add support for human approvals over email. This will start a little bit contrived, just to get the concepts down - We'll start by invoking the workflow from the CLI but approvals for `divide` and `request_more_information` will be handled over email, then the final `done_for_now` answer will be printed back to the CLI While contrived, this is a great example of the flexibility you get from [factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md) steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: "Install HumanLayer" command: | npm install humanlayer incremental: true - text: "Update CLI to send `divide` and `request_more_information` to a human via email" file: {src: ./walkthrough/11-cli.ts, dest: src/cli.ts} - text: "Run the CLI" command: | npx tsx src/index.ts 'can you divide 4 by 5' results: - text: "The last line of your program should mention human review step" code: | nextStep { intent: 'divide', a: 4, b: 5 } HumanLayer: Requested human approval from HumanLayer cloud - text: | go ahead and respond to the email with some feedback: ![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true) - text: | you should get another email with an updated attempt based on your feedback! You can go ahead and approve this one: ![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true) results: - text: and your final output will look like code: | nextStep { intent: 'done_for_now', message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!' } The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask! - text: | lets implement the `request_more_information` flow as well file: {src: ./walkthrough/11b-cli.ts, dest: src/cli.ts} - text: | lets test the require_approval flow as by asking for a calculation with garbled input: command: | npx tsx src/index.ts 'can you multiply 4 and xyz' - text: "You should get an email with a request for clarification" command: | Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else? - text: you can response with something like command: | use 8 instead of xyz results: - text: you should see a final result on the CLI like code: | I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32. - text: | as a final step, lets explore using a custom html template for the email file: {src: ./walkthrough/11c-cli.ts, dest: src/cli.ts} - text: | first try with divide: command: | npx tsx src/index.ts 'can you divide 4 by 5' results: - text: | you should see a slightly different email with the custom template ![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true) feel free to run with the flow and then you can try updating the template to your liking (if you're using cursor, something as simple as highlighting the template and asking to "make it better" should do the trick) try triggering "request_more_information" as well! - text: | thats it - in the next chapter, we'll build a fully email-driven workflow agent that uses webhooks for human approval - name: humanlayer-webhook title: "Chapter XX - HumanLayer Webhook Integration" text: | the previous sections used the humanlayer SDK in "synchronous mode" - that means every time we wait for human approval, we sit in a loop polling until the human response if received. That's obviously not ideal, especially for production workloads, so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md) by updating the server to end processing after contacting a human, and use webhooks to receive the results. steps: - text: | add code to initialize humanlayer in the server file: {src: ./walkthrough/12-1-server-init.ts, dest: src/server.ts} - text: | next, lets update the /thread endpoint to 1. handle requests asynchronously, returning immediately 2. create a human contact on request_more_information and done_for_now calls # file: {src: } - text: | Update the server to be able to handle request_clarification responses - remove the old /response endpoint and types - update the /thread endpoint to run processing asynchronously, return immediately - send a state.threadId when requesting human responses - add a handleHumanResponse function to process the human response - add a /webhook endpoint to handle the webhook response file: {src: ./walkthrough/12a-server.ts, dest: src/server.ts} - text: "Start the server in another terminal" command: | npx tsx src/server.ts - text: | now that the server is running, send a payload to the '/thread' endpoint - text: __ do the response step - text: __ now handle approvals for divide - text: __ now also handle done_for_now ================================================ FILE: workshops/2025-05-17/.gitignore ================================================ baml_src/*.baml src/*.ts package.json package-lock.json tsconfig.json build/ ================================================ FILE: workshops/2025-05-17/sections/00-hello-world/README.md ================================================ # Chapter 0 - Hello World Let's start with a basic TypeScript setup and a hello world program. This guide is written in TypeScript (yes, a python version is coming soon) There are many checkpoints between the every file edit in theworkshop steps, so even if you aren't super familiar with typescript, you should be able to keep up and run each example. To run this guide, you'll need a relatively recent version of nodejs and npm installed You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine brew install node@20 You should see the node version node --version Copy initial package.json cp ./walkthrough/00-package.json package.json
show file ```json // ./walkthrough/00-package.json { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ```
Install dependencies npm install Copy tsconfig.json cp ./walkthrough/00-tsconfig.json tsconfig.json
show file ```json // ./walkthrough/00-tsconfig.json { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ```
add .gitignore cp ./walkthrough/00-.gitignore .gitignore
show file ```gitignore // ./walkthrough/00-.gitignore baml_client/ node_modules/ ```
Create src folder mkdir -p src Add a simple hello world index.ts cp ./walkthrough/00-index.ts src/index.ts
show file ```ts // ./walkthrough/00-index.ts async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ```
Run it to verify npx tsx src/index.ts You should see: hello, world! ================================================ FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-index.ts ================================================ async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ================================================ FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05-17/sections/00-hello-world/walkthrough/00-tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05-17/sections/01-cli-and-agent/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05-17/sections/01-cli-and-agent/README.md ================================================ # Chapter 1 - CLI and Agent Loop Now let's add BAML and create our first agent with a CLI interface. First, we'll need to install [BAML](https://github.com/boundaryml/baml) which is a tool for prompting and structured outputs. npm install @boundaryml/baml Initialize BAML npx baml-cli init Remove default resume.baml rm baml_src/resume.baml Add our starter agent, a single baml prompt that we'll build on cp ./walkthrough/01-agent.baml baml_src/agent.baml
show file ```rust // ./walkthrough/01-agent.baml class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> DoneForNow { client Qwen3 // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ```
Generate BAML client code npx baml-cli generate Enable BAML logging for this section export BAML_LOG=debug Add the CLI interface cp ./walkthrough/01-cli.ts src/cli.ts
show file ```ts // ./walkthrough/01-cli.ts // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ```
Update index.ts to use the CLI ```diff src/index.ts +import { cli } from "./cli" + async function hello(): Promise { console.log('hello, world!') async function main() { - await hello() + await cli() } ```
skip this step cp ./walkthrough/01-index.ts src/index.ts
Add the agent implementation cp ./walkthrough/01-agent.ts src/agent.ts
show file ```ts // ./walkthrough/01-agent.ts import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ```
The the BAML code is configured to use BASETEN_API_KEY by default To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co), and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/). ```rust function DetermineNextStep(thread: string) -> DoneForNow { client Qwen3 // ... ``` If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key. If you want to try swapping out the model, you can change the `client` line. [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms) For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider. For example, to use openai with an OPENAI_API_KEY, you can do: client "openai/gpt-4o" Set your env vars export BASETEN_API_KEY=... export BASETEN_BASE_URL=... Try it out npx tsx src/index.ts hello you should see a familiar response from the model { intent: 'done_for_now', message: 'Hello! How can I assist you today?' } ================================================ FILE: workshops/2025-05-17/sections/01-cli-and-agent/src/index.ts ================================================ async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ================================================ FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> DoneForNow { client Qwen3 // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ================================================ FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05-17/sections/01-cli-and-agent/walkthrough/01-index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/README.md ================================================ # Chapter 2 - Add Calculator Tools Let's add some calculator tools to our agent. Let's start by adding a tool definition for the calculator These are simpile structured outputs that we'll ask the model to return as a "next step" in the agentic loop. cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
show file ```rust // ./walkthrough/02-tool_calculator.baml type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ```
Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps ```diff baml_src/agent.baml function DetermineNextStep( thread: string -) -> DoneForNow { +) -> CalculatorTools | DoneForNow { client Qwen3 ```
skip this step cp ./walkthrough/02-agent.baml baml_src/agent.baml
Generate updated BAML client npx baml-cli generate Try out the calculator npx tsx src/index.ts 'can you add 3 and 4' You should see a tool call to the calculator { intent: 'add', a: 3, b: 4 } ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/baml_src/agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> DoneForNow { client Qwen3 // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.88.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/src/agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/walkthrough/02-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client Qwen3 // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05-17/sections/02-calculator-tools/walkthrough/02-tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/README.md ================================================ # Chapter 3 - Process Tool Calls in a Loop Now let's add a real agentic loop that can run the tools and get a final answer from the LLM. First, lets update the agent to handle the tool call ```diff src/agent.ts } -// right now this just runs one turn with the LLM, but -// we'll update this function to handle all the agent logic -export async function agentLoop(thread: Thread): Promise { - const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); - return nextStep; + + +export async function agentLoop(thread: Thread): Promise { + + while (true) { + const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); + console.log("nextStep", nextStep); + + switch (nextStep.intent) { + case "done_for_now": + // response to human, return the next step object + return nextStep.message; + case "add": + thread.events.push({ + "type": "tool_call", + "data": nextStep + }); + const result = nextStep.a + nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + continue; + default: + throw new Error(`Unknown intent: ${nextStep.intent}`); + } + } } ```
skip this step cp ./walkthrough/03-agent.ts src/agent.ts
Now, lets try it out npx tsx src/index.ts 'can you add 3 and 4' you should see the agent call the tool and then return the result { intent: 'done_for_now', message: 'The sum of 3 and 4 is 7.' } For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output export BAML_LOG=off Try a multi-step calculation npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result' you'll notice that tools like multiply and divide are not available npx tsx src/index.ts 'can you multiply 3 and 4' next, let's add handlers for the rest of the calculator tools ```diff src/agent.ts -import { b } from "../baml_client"; +import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; -// tool call or a respond to human tool -type AgentResponse = Awaited>; - export interface Event { type: string } +export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; +export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { + let result: number; + switch (nextStep.intent) { + case "add": + result = nextStep.a + nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "subtract": + result = nextStep.a - nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "multiply": + result = nextStep.a * nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "divide": + result = nextStep.a / nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + } +} export async function agentLoop(thread: Thread): Promise { console.log("nextStep", nextStep); + thread.events.push({ + "type": "tool_call", + "data": nextStep + }); + switch (nextStep.intent) { case "done_for_now": return nextStep.message; case "add": - thread.events.push({ - "type": "tool_call", - "data": nextStep - }); - const result = nextStep.a + nextStep.b; - console.log("tool_response", result); - thread.events.push({ - "type": "tool_response", - "data": result - }); - continue; - default: - throw new Error(`Unknown intent: ${nextStep.intent}`); + case "subtract": + case "multiply": + case "divide": + thread = await handleNextStep(nextStep, thread); } } ```
skip this step cp ./walkthrough/03b-agent.ts src/agent.ts
Test subtraction npx tsx src/index.ts 'can you subtract 3 from 4' now, let's test the multiplication tool npx tsx src/index.ts 'can you multiply 3 and 4' finally, let's test a more complex calculation with multiple operations npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' congratulations, you've taking your first step into hand-rolling an agent loop. from here, we're going to start incorporating some more intermediate and advanced concepts for 12-factor agents. ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/baml_src/agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client Qwen3 // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/baml_src/clients.baml ================================================ // Learn more about clients at https://docs.boundaryml.com/docs/snippets/clients/overview client CustomGPT4o { provider openai options { model "gpt-4o" api_key env.OPENAI_API_KEY } } client CustomGPT4oMini { provider openai retry_policy Exponential options { model "gpt-4o-mini" api_key env.OPENAI_API_KEY } } client CustomSonnet { provider anthropic options { model "claude-3-5-sonnet-20241022" api_key env.ANTHROPIC_API_KEY } } client CustomHaiku { provider anthropic retry_policy Constant options { model "claude-3-haiku-20240307" api_key env.ANTHROPIC_API_KEY } } // https://docs.boundaryml.com/docs/snippets/clients/round-robin client CustomFast { provider round-robin options { // This will alternate between the two clients strategy [CustomGPT4oMini, CustomHaiku] } } // https://docs.boundaryml.com/docs/snippets/clients/fallback client OpenaiFallback { provider fallback options { // This will try the clients in order until one succeeds strategy [CustomGPT4oMini, CustomGPT4oMini] } } // https://docs.boundaryml.com/docs/snippets/clients/retry retry_policy Constant { max_retries 3 // Strategy is optional strategy { type constant_delay delay_ms 200 } } retry_policy Exponential { max_retries 2 // Strategy is optional strategy { type exponential_backoff delay_ms 300 multiplier 1.5 max_delay_ms 10000 } } ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/baml_src/generators.baml ================================================ // This helps use auto generate libraries you can use in the language of // your choice. You can have multiple generators if you use multiple languages. // Just ensure that the output_dir is different for each generator. generator target { // Valid values: "python/pydantic", "typescript", "ruby/sorbet", "rest/openapi" output_type "typescript" // Where the generated code will be saved (relative to baml_src/) output_dir "../" // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml). // The BAML VSCode extension version should also match this version. version "0.88.0" // Valid values: "sync", "async" // This controls what `b.FunctionName()` will be (sync or async). default_client_mode async } ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/baml_src/tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/src/agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/src/cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/src/index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/walkthrough/03-agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": thread.events.push({ "type": "tool_call", "data": nextStep }); const result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); continue; default: throw new Error(`Unknown intent: ${nextStep.intent}`); } } } ================================================ FILE: workshops/2025-05-17/sections/03-tool-loop/walkthrough/03b-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05-17/walkthrough/00-.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-05-17/walkthrough/00-index.ts ================================================ async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ================================================ FILE: workshops/2025-05-17/walkthrough/00-package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-05-17/walkthrough/00-tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-05-17/walkthrough/01-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> DoneForNow { client Qwen3 // client "openai/gpt-4o" // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05-17/walkthrough/01-agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ================================================ FILE: workshops/2025-05-17/walkthrough/01-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ================================================ FILE: workshops/2025-05-17/walkthrough/01-index.ts ================================================ import { cli } from "./cli" async function hello(): Promise { console.log('hello, world!') } async function main() { await cli() } main().catch(console.error) ================================================ FILE: workshops/2025-05-17/walkthrough/02-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client Qwen3 // client "openai/gpt-4o" // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-05-17/walkthrough/02-tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-05-17/walkthrough/03-agent.ts ================================================ import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": thread.events.push({ "type": "tool_call", "data": nextStep }); const result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); continue; default: throw new Error(`Unknown intent: ${nextStep.intent}`); } } } ================================================ FILE: workshops/2025-05-17/walkthrough/03b-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05-17/walkthrough/04-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client Qwen3 // client "openai/gpt-4o" prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } } ================================================ FILE: workshops/2025-05-17/walkthrough/04b-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client Qwen3 // client "openai/gpt-4o" prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(hello, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(math_operation, {{this.intent == "multiply"}}) } ================================================ FILE: workshops/2025-05-17/walkthrough/04c-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client Qwen3 // client "openai/gpt-4o" prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } ================================================ FILE: workshops/2025-05-17/walkthrough/05-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client Qwen3 // client "openai/gpt-4o" prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } ================================================ FILE: workshops/2025-05-17/walkthrough/05-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05-17/walkthrough/05-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); let lastEvent = result.events.slice(-1)[0]; while (lastEvent.data.intent === "request_more_information") { const message = await askHuman(lastEvent.data.message); thread.events.push({ type: "human_response", data: message }); const result = await agentLoop(thread); lastEvent = result.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(message: string) { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve(answer); }); }); } ================================================ FILE: workshops/2025-05-17/walkthrough/05b-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client Qwen3 // client "openai/gpt-4o" prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05-17/walkthrough/05c-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client Qwen3 // client "openai/gpt-4o" prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05-17/walkthrough/06-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client Qwen3 // client "openai/gpt-4o" prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} First, always plan out what to do next, for example: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-05-17/walkthrough/07-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events, null, 2); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05-17/walkthrough/07b-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05-17/walkthrough/07c-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client Qwen3 // client "openai/gpt-4o" prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-05-17/walkthrough/08-server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; const app = express(); app.use(express.json()); app.set('json spaces', 2); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const result = await agentLoop(thread); res.json(result); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { // optional - add state res.status(404).json({ error: "Not implemented yet" }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05-17/walkthrough/09-server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } thread.events.push({ type: "human_response", data: req.body.message }); // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05-17/walkthrough/09-state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ================================================ FILE: workshops/2025-05-17/walkthrough/10-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } awaitingHumanResponse(): boolean { const lastEvent = this.events[this.events.length - 1]; return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); } awaitingHumanApproval(): boolean { const lastEvent = this.events[this.events.length - 1]; return lastEvent.data.intent === 'divide'; } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "divide": // divide is scary, return it for human approval return thread; case "add": case "subtract": case "multiply": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-05-17/walkthrough/10-server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05-17/walkthrough/11-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, } } }) if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05-17/walkthrough/11b-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05-17/walkthrough/11c-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, // custom email body - jinja template: `{% if type == 'request_more_information' %} {{ event.spec.msg }} {% else %} agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} with args: {{event.spec.kwargs}}

reply to this email to approve {% endif %}` } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-05-17/walkthrough/12-1-server-init.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const result = await agentLoop(thread); store.update(req.params.id, result); lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(result); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05-17/walkthrough/12-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05-17/walkthrough/12a-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05-17/walkthrough/12aa-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05-17/walkthrough/12b-server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { V1Beta2EmailEventReceived, V1Beta2FunctionCallCompleted, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const result = await agentLoop(thread); store.update(req.params.id, result); lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(result); }); type WebhookResponse = V1Beta2HumanContactCompleted; app.post('/webhook/response', async (req, res) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } thread.events.push({ type: "human_response", data: response.event.status?.response }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-05-17/walkthrough.md ================================================ # Building the 12-factor agent template from scratch Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology. ## Cleanup Make sure you're starting from a clean slate Clean up existing files rm -rf baml_src/ && rm -rf src/ ## Chapter 0 - Hello World Let's start with a basic TypeScript setup and a hello world program. This guide is written in TypeScript (yes, a python version is coming soon) There are many checkpoints between the every file edit in theworkshop steps, so even if you aren't super familiar with typescript, you should be able to keep up and run each example. To run this guide, you'll need a relatively recent version of nodejs and npm installed You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine brew install node@20 You should see the node version node --version Copy initial package.json cp ./walkthrough/00-package.json package.json
show file ```json // ./walkthrough/00-package.json { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ```
Install dependencies npm install Copy tsconfig.json cp ./walkthrough/00-tsconfig.json tsconfig.json
show file ```json // ./walkthrough/00-tsconfig.json { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ```
add .gitignore cp ./walkthrough/00-.gitignore .gitignore
show file ```gitignore // ./walkthrough/00-.gitignore baml_client/ node_modules/ ```
Create src folder Add a simple hello world index.ts cp ./walkthrough/00-index.ts src/index.ts
show file ```ts // ./walkthrough/00-index.ts async function hello(): Promise { console.log('hello, world!') } async function main() { await hello() } main().catch(console.error) ```
Run it to verify npx tsx src/index.ts You should see: hello, world! ## Chapter 1 - CLI and Agent Loop Now let's add BAML and create our first agent with a CLI interface. First, we'll need to install [BAML](https://github.com/boundaryml/baml) which is a tool for prompting and structured outputs. npm install @boundaryml/baml Initialize BAML npx baml-cli init Remove default resume.baml rm baml_src/resume.baml Add our starter agent, a single baml prompt that we'll build on cp ./walkthrough/01-agent.baml baml_src/agent.baml
show file ```rust // ./walkthrough/01-agent.baml class DoneForNow { intent "done_for_now" message string } client Qwen3 { provider "openai-generic" options { base_url env.BASETEN_BASE_URL api_key env.BASETEN_API_KEY } } function DetermineNextStep( thread: string ) -> DoneForNow { client Qwen3 // client "openai/gpt-4o" // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} /nothink You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ```
Generate BAML client code npx baml-cli generate Enable BAML logging for this section export BAML_LOG=debug Add the CLI interface cp ./walkthrough/01-cli.ts src/cli.ts
show file ```ts // ./walkthrough/01-cli.ts // cli.ts lets you invoke the agent loop from the command line import { agentLoop, Thread, Event } from "./agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread const result = await agentLoop(thread); console.log(result); } ```
Update index.ts to use the CLI ```diff src/index.ts +import { cli } from "./cli" + async function hello(): Promise { console.log('hello, world!') async function main() { - await hello() + await cli() } ```
skip this step cp ./walkthrough/01-index.ts src/index.ts
Add the agent implementation cp ./walkthrough/01-agent.ts src/agent.ts
show file ```ts // ./walkthrough/01-agent.ts import { b } from "../baml_client"; // tool call or a respond to human tool type AgentResponse = Awaited>; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } // right now this just runs one turn with the LLM, but // we'll update this function to handle all the agent logic export async function agentLoop(thread: Thread): Promise { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); return nextStep; } ```
The the BAML code is configured to use BASETEN_API_KEY by default To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co), and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/). ```rust function DetermineNextStep(thread: string) -> DoneForNow { client Qwen3 // ... ``` If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key. If you want to try swapping out the model, you can change the `client` line. [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms) For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider. For example, to use openai with an OPENAI_API_KEY, you can do: client "openai/gpt-4o" Set your env vars export BASETEN_API_KEY=... export BASETEN_BASE_URL=... Try it out npx tsx src/index.ts hello you should see a familiar response from the model { intent: 'done_for_now', message: 'Hello! How can I assist you today?' } ## Chapter 2 - Add Calculator Tools Let's add some calculator tools to our agent. Let's start by adding a tool definition for the calculator These are simpile structured outputs that we'll ask the model to return as a "next step" in the agentic loop. cp ./walkthrough/02-tool_calculator.baml baml_src/tool_calculator.baml
show file ```rust // ./walkthrough/02-tool_calculator.baml type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ```
Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps ```diff baml_src/agent.baml function DetermineNextStep( thread: string -) -> DoneForNow { +) -> CalculatorTools | DoneForNow { client Qwen3 + // client "openai/gpt-4o" ```
skip this step cp ./walkthrough/02-agent.baml baml_src/agent.baml
Generate updated BAML client npx baml-cli generate Try out the calculator npx tsx src/index.ts 'can you add 3 and 4' You should see a tool call to the calculator { intent: 'add', a: 3, b: 4 } ## Chapter 3 - Process Tool Calls in a Loop Now let's add a real agentic loop that can run the tools and get a final answer from the LLM. First, lets update the agent to handle the tool call ```diff src/agent.ts } -// right now this just runs one turn with the LLM, but -// we'll update this function to handle all the agent logic -export async function agentLoop(thread: Thread): Promise { - const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); - return nextStep; + + +export async function agentLoop(thread: Thread): Promise { + + while (true) { + const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); + console.log("nextStep", nextStep); + + switch (nextStep.intent) { + case "done_for_now": + // response to human, return the next step object + return nextStep.message; + case "add": + thread.events.push({ + "type": "tool_call", + "data": nextStep + }); + const result = nextStep.a + nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + continue; + default: + throw new Error(`Unknown intent: ${nextStep.intent}`); + } + } } ```
skip this step cp ./walkthrough/03-agent.ts src/agent.ts
Now, lets try it out npx tsx src/index.ts 'can you add 3 and 4' you should see the agent call the tool and then return the result { intent: 'done_for_now', message: 'The sum of 3 and 4 is 7.' } For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output export BAML_LOG=off Try a multi-step calculation npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result' you'll notice that tools like multiply and divide are not available npx tsx src/index.ts 'can you multiply 3 and 4' next, let's add handlers for the rest of the calculator tools ```diff src/agent.ts -import { b } from "../baml_client"; +import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; -// tool call or a respond to human tool -type AgentResponse = Awaited>; - export interface Event { type: string } +export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; +export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { + let result: number; + switch (nextStep.intent) { + case "add": + result = nextStep.a + nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "subtract": + result = nextStep.a - nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "multiply": + result = nextStep.a * nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + case "divide": + result = nextStep.a / nextStep.b; + console.log("tool_response", result); + thread.events.push({ + "type": "tool_response", + "data": result + }); + return thread; + } +} export async function agentLoop(thread: Thread): Promise { console.log("nextStep", nextStep); + thread.events.push({ + "type": "tool_call", + "data": nextStep + }); + switch (nextStep.intent) { case "done_for_now": return nextStep.message; case "add": - thread.events.push({ - "type": "tool_call", - "data": nextStep - }); - const result = nextStep.a + nextStep.b; - console.log("tool_response", result); - thread.events.push({ - "type": "tool_response", - "data": result - }); - continue; - default: - throw new Error(`Unknown intent: ${nextStep.intent}`); + case "subtract": + case "multiply": + case "divide": + thread = await handleNextStep(nextStep, thread); } } ```
skip this step cp ./walkthrough/03b-agent.ts src/agent.ts
Test subtraction npx tsx src/index.ts 'can you subtract 3 from 4' now, let's test the multiplication tool npx tsx src/index.ts 'can you multiply 3 and 4' finally, let's test a more complex calculation with multiple operations npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' congratulations, you've taking your first step into hand-rolling an agent loop. from here, we're going to start incorporating some more intermediate and advanced concepts for 12-factor agents. ## Chapter 4 - Add Tests to agent.baml Let's add some tests to our BAML agent. to start, leave the baml logs enabled export BAML_LOG=debug next, let's add some tests to the agent We'll start with a simple test that checks the agent's ability to handle a basic calculation. ```diff baml_src/agent.baml ) -> CalculatorTools | DoneForNow { client Qwen3 - // client "openai/gpt-4o" - // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. "# } + +test MathOperation { + functions [DetermineNextStep] + args { + thread #" + { + "type": "user_input", + "data": "can you multiply 3 and 4?" + } + "# + } +} + ```
skip this step cp ./walkthrough/04-agent.baml baml_src/agent.baml
Run the tests npx baml-cli test now, let's improve the test with assertions! Assertions are a great way to make sure the agent is working as expected, and can easily be extended to check for more complex behavior. ```diff baml_src/agent.baml ) -> CalculatorTools | DoneForNow { client Qwen3 prompt #" "# } + @@assert(hello, {{this.intent == "done_for_now"}}) } "# } + @@assert(math_operation, {{this.intent == "multiply"}}) } ```
skip this step cp ./walkthrough/04b-agent.baml baml_src/agent.baml
Run the tests npx baml-cli test as you add more tests, you can disable the logs to keep the output clean. You may want to turn them on as you iterate on specific tests. export BAML_LOG=off now, let's add some more complex test cases, where we resume from in the middle of an in-progress agentic context window ```diff baml_src/agent.baml } } - function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client Qwen3 + prompt #" {{ _.role("system") }} "# } - @@assert(hello, {{this.intent == "done_for_now"}}) + @@assert(intent, {{this.intent == "done_for_now"}}) } "# } - @@assert(math_operation, {{this.intent == "multiply"}}) + @@assert(intent, {{this.intent == "multiply"}}) } +test LongMath { + functions [DetermineNextStep] + args { + thread #" + [ + { + "type": "user_input", + "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" + }, + { + "type": "tool_call", + "data": { + "intent": "multiply", + "a": 3, + "b": 4 + } + }, + { + "type": "tool_response", + "data": 12 + }, + { + "type": "tool_call", + "data": { + "intent": "divide", + "a": 12, + "b": 2 + } + }, + { + "type": "tool_response", + "data": 6 + }, + { + "type": "tool_call", + "data": { + "intent": "add", + "a": 6, + "b": 12 + } + }, + { + "type": "tool_response", + "data": 18 + } + ] + "# + } + @@assert(intent, {{this.intent == "done_for_now"}}) + @@assert(answer, {{"18" in this.message}}) +} + ```
skip this step cp ./walkthrough/04c-agent.baml baml_src/agent.baml
let's try to run it npx baml-cli test ## Chapter 5 - Multiple Human Tools In this section, we'll add support for multiple tools that serve to contact humans. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off first, let's add a tool that can request clarification from a human this will be different from the "done_for_now" tool, and can be used to more flexibly handle different types of human interactions in your agent. ```diff baml_src/agent.baml +// human tools are async requests to a human +type HumanTools = ClarificationRequest | DoneForNow + +class ClarificationRequest { + intent "request_more_information" @description("you can request more information from me") + message string +} + class DoneForNow { intent "done_for_now" - message string + + message string @description(#" + message to send to the user about the work that was done. + "#) } } } + function DetermineNextStep( thread: string -) -> CalculatorTools | DoneForNow { +) -> HumanTools | CalculatorTools { client Qwen3 } + ```
skip this step cp ./walkthrough/05-agent.baml baml_src/agent.baml
next, let's re-generate the client code NOTE - if you're using the VSCode extension for BAML, the client will be regenerated automatically when you save the file in your editor. npx baml-cli generate now, let's update the agent to use the new tool ```diff src/agent.ts } -export async function agentLoop(thread: Thread): Promise { +export async function agentLoop(thread: Thread): Promise { while (true) { switch (nextStep.intent) { case "done_for_now": - // response to human, return the next step object - return nextStep.message; + case "request_more_information": + // response to human, return the thread + return thread; case "add": case "subtract": ```
skip this step cp ./walkthrough/05-agent.ts src/agent.ts
next, let's update the CLI to handle clarification requests by requesting input from the user on the CLI ```diff src/cli.ts // cli.ts lets you invoke the agent loop from the command line -import { agentLoop, Thread, Event } from "./agent"; +import { agentLoop, Thread, Event } from "../src/agent"; + + export async function cli() { // Get command line arguments, skipping the first two (node and script name) // Run the agent loop with the thread const result = await agentLoop(thread); - console.log(result); + let lastEvent = result.events.slice(-1)[0]; + + while (lastEvent.data.intent === "request_more_information") { + const message = await askHuman(lastEvent.data.message); + thread.events.push({ type: "human_response", data: message }); + const result = await agentLoop(thread); + lastEvent = result.events.slice(-1)[0]; + } + + // print the final result + // optional - you could loop here too + console.log(lastEvent.data.message); + process.exit(0); } + +async function askHuman(message: string) { + const readline = require('readline').createInterface({ + input: process.stdin, + output: process.stdout + }); + + return new Promise((resolve) => { + readline.question(`${message}\n> `, (answer: string) => { + resolve(answer); + }); + }); +} ```
skip this step cp ./walkthrough/05-cli.ts src/cli.ts
let's try it out npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& ' next, let's add a test that checks the agent's ability to handle a clarification request ```diff baml_src/agent.baml ) -> HumanTools | CalculatorTools { client Qwen3 - // client "openai/gpt-4o" + +test MathOperationWithClarification { + functions [DetermineNextStep] + args { + thread #" + [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] + "# + } + @@assert(intent, {{this.intent == "request_more_information"}}) +} + +test MathOperationPostClarification { + functions [DetermineNextStep] + args { + thread #" + [ + {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, + {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, + {"type":"human_response","data":"lets try 12 instead"}, + ] + "# + } + @@assert(intent, {{this.intent == "multiply"}}) + @@assert(a, {{this.b == 12}}) + @@assert(b, {{this.a == 3}}) +} + + + ```
skip this step cp ./walkthrough/05b-agent.baml baml_src/agent.baml
and now we can run the tests again npx baml-cli test you'll notice the new test passes, but the hello world test fails This is because the agent's default behavior is to return "done_for_now" ```diff baml_src/agent.baml api_key env.BASETEN_API_KEY } function DetermineNextStep( ) -> HumanTools | CalculatorTools { client Qwen3 + // client "openai/gpt-4o" "# } - @@assert(intent, {{this.intent == "done_for_now"}}) + @@assert(intent, {{this.intent == "request_more_information"}}) } ```
skip this step cp ./walkthrough/05c-agent.baml baml_src/agent.baml
Verify tests pass npx baml-cli test ## Chapter 6 - Customize Your Prompt with Reasoning In this section, we'll explore how to customize the prompt of the agent with reasoning steps. this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts) for this section, it will be helpful to leave the baml logs enabled export BAML_LOG=debug update the agent prompt to include a reasoning step ```diff baml_src/agent.baml api_key env.BASETEN_API_KEY } function DetermineNextStep( {{ ctx.output_format }} + + First, always plan out what to do next, for example: + + - ... + - ... + - ... + + {...} // schema "# } @@assert(b, {{this.a == 3}}) } - - ```
skip this step cp ./walkthrough/06-agent.baml baml_src/agent.baml
generate the updated client npx baml-cli generate now, you can try it out with a simple prompt npx tsx src/index.ts 'can you multiply 3 and 4' you should see output from the baml logs showing the reasoning steps #### optional challenge add a field to your tool output format that includes the reasoning steps in the output! ## Chapter 7 - Customize Your Context Window In this section, we'll explore how to customize the context window of the agent. this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) update the agent to pretty-print the Context window for the model ```diff src/agent.ts // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 - return JSON.stringify(this.events); + return JSON.stringify(this.events, null, 2); } } ```
skip this step cp ./walkthrough/07-agent.ts src/agent.ts
Test the formatting BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' next, let's update the agent to use XML formatting instead this is a very popular format for passing data to a model, among other things, because of the token efficiency of XML. ```diff src/agent.ts serializeForLLM() { - // can change this to whatever custom serialization you want to do, XML, etc - // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 - return JSON.stringify(this.events, null, 2); + return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } + + trimLeadingWhitespace(s: string) { + return s.replace(/^[ \t]+/gm, ''); + } + + serializeOneEvent(e: Event) { + return this.trimLeadingWhitespace(` + <${e.data?.intent || e.type}> + ${ + typeof e.data !== 'object' ? e.data : + Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} + + `) + } } ```
skip this step cp ./walkthrough/07b-agent.ts src/agent.ts
let's try it out BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' lets update our tests to match the new output format ```diff baml_src/agent.baml {{ ctx.output_format }} - First, always plan out what to do next, for example: + Always think about what to do next first, like: - ... args { thread #" - { - "type": "user_input", - "data": "hello!" - } + + hello! + "# } args { thread #" - { - "type": "user_input", - "data": "can you multiply 3 and 4?" - } + + can you multiply 3 and 4? + "# } args { thread #" - [ - { - "type": "user_input", - "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" - }, - { - "type": "tool_call", - "data": { - "intent": "multiply", - "a": 3, - "b": 4 - } - }, - { - "type": "tool_response", - "data": 12 - }, - { - "type": "tool_call", - "data": { - "intent": "divide", - "a": 12, - "b": 2 - } - }, - { - "type": "tool_response", - "data": 6 - }, - { - "type": "tool_call", - "data": { - "intent": "add", - "a": 6, - "b": 12 - } - }, - { - "type": "tool_response", - "data": 18 - } - ] + + can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? + + + + + a: 3 + b: 4 + + + + + 12 + + + + + a: 12 + b: 2 + + + + + 6 + + + + + a: 6 + b: 12 + + + + + 18 + + "# } args { thread #" - [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] + + can you multiply 3 and fe1iiaff10 + "# } args { thread #" - [ - {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, - {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, - {"type":"human_response","data":"lets try 12 instead"}, - ] + + can you multiply 3 and FD*(#F&& ? + + + + message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? + + + + lets try 12 instead + "# } @@assert(intent, {{this.intent == "multiply"}}) } ```
skip this step cp ./walkthrough/07c-agent.baml baml_src/agent.baml
check out the updated tests npx baml-cli test ## Chapter 8 - Adding API Endpoints Add an Express server to expose the agent via HTTP. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install Express and types npm install express && npm install --save-dev @types/express supertest Add the server implementation cp ./walkthrough/08-server.ts src/server.ts
show file ```ts // ./walkthrough/08-server.ts import express from 'express'; import { Thread, agentLoop } from '../src/agent'; const app = express(); app.use(express.json()); app.set('json spaces', 2); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const result = await agentLoop(thread); res.json(result); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { // optional - add state res.status(404).json({ error: "Not implemented yet" }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ```
Start the server npx tsx src/server.ts Test with curl (in another terminal) curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you add 3 and 4"}' You should get an answer from the agent which includes the agentic trace, ending in a message like: {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."} ## Chapter 9 - In-Memory State and Async Clarification Add state management and async clarification support. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Add some simple in-memory state management for threads cp ./walkthrough/09-state.ts src/state.ts
show file ```ts // ./walkthrough/09-state.ts import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ```
update the server to use the state management * Add thread state management using `ThreadStore` * return thread IDs and response URLs from the /thread endpoint * implement GET /thread/:id * implement POST /thread/:id/response ```diff src/server.ts import express from 'express'; import { Thread, agentLoop } from '../src/agent'; +import { ThreadStore } from '../src/state'; const app = express(); app.set('json spaces', 2); +const store = new ThreadStore(); + // POST /thread - Start new thread app.post('/thread', async (req, res) => { data: req.body.message }]); - const result = await agentLoop(thread); - res.json(result); + + const threadId = store.create(thread); + const newThread = await agentLoop(thread); + + store.update(threadId, newThread); + + const lastEvent = newThread.events[newThread.events.length - 1]; + // If we exited the loop, include the response URL so the client can + // push a new message onto the thread + lastEvent.data.response_url = `/thread/${threadId}/response`; + + console.log("returning last event from endpoint", lastEvent); + + res.json({ + thread_id: threadId, + ...newThread + }); }); app.get('/thread/:id', (req, res) => { - // optional - add state - res.status(404).json({ error: "Not implemented yet" }); + const thread = store.get(req.params.id); + if (!thread) { + return res.status(404).json({ error: "Thread not found" }); + } + res.json(thread); }); +// POST /thread/:id/response - Handle clarification response +app.post('/thread/:id/response', async (req, res) => { + let thread = store.get(req.params.id); + if (!thread) { + return res.status(404).json({ error: "Thread not found" }); + } + + thread.events.push({ + type: "human_response", + data: req.body.message + }); + + // loop until stop event + const newThread = await agentLoop(thread); + + store.update(req.params.id, newThread); + + const lastEvent = newThread.events[newThread.events.length - 1]; + lastEvent.data.response_url = `/thread/${req.params.id}/response`; + + console.log("returning last event from endpoint", lastEvent); + + res.json(newThread); +}); + const port = process.env.PORT || 3000; app.listen(port, () => { ```
skip this step cp ./walkthrough/09-server.ts src/server.ts
Start the server npx tsx src/server.ts Test clarification flow curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you multiply 3 and xyz"}' ## Chapter 10 - Adding Human Approval Add support for human approval of operations. for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off update the server to handle human approvals * Import `handleNextStep` to execute approved actions * Add two payload types to distinguish approvals from responses * Handle responses and approvals differently in the endpoint * Show better error messages when things go wrongs ```diff src/server.ts import express from 'express'; -import { Thread, agentLoop } from '../src/agent'; +import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; }); + +type ApprovalPayload = { + type: "approval"; + approved: boolean; + comment?: string; +} + +type ResponsePayload = { + type: "response"; + response: string; +} + +type Payload = ApprovalPayload | ResponsePayload; + // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { return res.status(404).json({ error: "Thread not found" }); } + + const body: Payload = req.body; + + let lastEvent = thread.events[thread.events.length - 1]; + + if (thread.awaitingHumanResponse() && body.type === 'response') { + thread.events.push({ + type: "human_response", + data: body.response + }); + } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { + // push feedback onto the thread + thread.events.push({ + type: "tool_response", + data: `user denied the operation with feedback: "${body.comment}"` + }); + } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { + // approved, run the tool, pushing results onto the thread + await handleNextStep(lastEvent.data, thread); + } else { + res.status(400).json({ + error: "Invalid request: " + body.type, + awaitingHumanResponse: thread.awaitingHumanResponse(), + awaitingHumanApproval: thread.awaitingHumanApproval() + }); + return; + } + - thread.events.push({ - type: "human_response", - data: req.body.message - }); - // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); - const lastEvent = newThread.events[newThread.events.length - 1]; + lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; ```
skip this step cp ./walkthrough/10-server.ts src/server.ts
Add a few methods to the agent to handle approvals and responses ```diff src/agent.ts `) } + + awaitingHumanResponse(): boolean { + const lastEvent = this.events[this.events.length - 1]; + return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); + } + + awaitingHumanApproval(): boolean { + const lastEvent = this.events[this.events.length - 1]; + return lastEvent.data.intent === 'divide'; + } } // response to human, return the thread return thread; + case "divide": + // divide is scary, return it for human approval + return thread; case "add": case "subtract": case "multiply": - case "divide": thread = await handleNextStep(nextStep, thread); } ```
skip this step cp ./walkthrough/10-agent.ts src/agent.ts
Start the server npx tsx src/server.ts Test division with approval curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you divide 3 by 4"}' You should see: { "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b", "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } } ] } reject the request with another curl call, changing the thread ID curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}' You should see: the last tool call is now `"intent":"divide","a":3,"b":5` { "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } }, { "type": "tool_response", "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\"" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 5, "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response" } } ] } now you can approve the operation curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": true}' you should see the final message includes the tool response and final result! ... { "type": "tool_response", "data": 0.5 }, { "type": "done_for_now", "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!", "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response" } ## Chapter 11 - Human Approvals over email in this section, we'll add support for human approvals over email. This will start a little bit contrived, just to get the concepts down - We'll start by invoking the workflow from the CLI but approvals for `divide` and `request_more_information` will be handled over email, then the final `done_for_now` answer will be printed back to the CLI While contrived, this is a great example of the flexibility you get from [factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md) for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details. export BAML_LOG=off Install HumanLayer npm install humanlayer Update CLI to send `divide` and `request_more_information` to a human via email ```diff src/cli.ts // cli.ts lets you invoke the agent loop from the command line +import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; - - export async function cli() { // Get command line arguments, skipping the first two (node and script name) // Run the agent loop with the thread - const result = await agentLoop(thread); - let lastEvent = result.events.slice(-1)[0]; + let newThread = await agentLoop(thread); + let lastEvent = newThread.events.slice(-1)[0]; - while (lastEvent.data.intent === "request_more_information") { - const message = await askHuman(lastEvent.data.message); - thread.events.push({ type: "human_response", data: message }); - const result = await agentLoop(thread); - lastEvent = result.events.slice(-1)[0]; + while (lastEvent.data.intent !== "done_for_now") { + const responseEvent = await askHuman(lastEvent); + thread.events.push(responseEvent); + newThread = await agentLoop(thread); + lastEvent = newThread.events.slice(-1)[0]; } // print the final result console.log(lastEvent.data.message); process.exit(0); } -async function askHuman(message: string) { +async function askHuman(lastEvent: Event): Promise { + if (process.env.HUMANLAYER_API_KEY) { + return await askHumanEmail(lastEvent); + } else { + return await askHumanCLI(lastEvent.data.message); + } +} + +async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { - resolve(answer); + resolve({ type: "human_response", data: answer }); }); }); } + +export async function askHumanEmail(lastEvent: Event): Promise { + if (!process.env.HUMANLAYER_EMAIL) { + throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); + } + const hl = humanlayer({ //reads apiKey from env + // name of this agent + runId: "12fa-cli-agent", + verbose: true, + contactChannel: { + // agent should request permission via email + email: { + address: process.env.HUMANLAYER_EMAIL, + } + } + }) + + if (lastEvent.data.intent === "divide") { + // fetch approval synchronously - this will block until reply + const response = await hl.fetchHumanApproval({ + spec: { + fn: "divide", + kwargs: { + a: lastEvent.data.a, + b: lastEvent.data.b + } + } + }) + + if (response.approved) { + const result = lastEvent.data.a / lastEvent.data.b; + console.log("tool_response", result); + return { + "type": "tool_response", + "data": result + }; + } else { + return { + "type": "tool_response", + "data": `user denied operation ${lastEvent.data.intent} + with feedback: ${response.comment}` + }; + } + } + throw new Error(`unknown tool: ${lastEvent.data.intent}`) +} ```
skip this step cp ./walkthrough/11-cli.ts src/cli.ts
Run the CLI npx tsx src/index.ts 'can you divide 4 by 5' The last line of your program should mention human review step nextStep { intent: 'divide', a: 4, b: 5 } HumanLayer: Requested human approval from HumanLayer cloud go ahead and respond to the email with some feedback: ![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true) you should get another email with an updated attempt based on your feedback! You can go ahead and approve this one: ![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true) and your final output will look like nextStep { intent: 'done_for_now', message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!' } The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask! lets implement the `request_more_information` flow as well ```diff src/cli.ts }) + if (lastEvent.data.intent === "request_more_information") { + // fetch response synchronously - this will block until reply + const response = await hl.fetchHumanResponse({ + spec: { + msg: lastEvent.data.message + } + }) + return { + "type": "tool_response", + "data": response + } + } + if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply ```
skip this step cp ./walkthrough/11b-cli.ts src/cli.ts
lets test the require_approval flow as by asking for a calculation with garbled input: npx tsx src/index.ts 'can you multiply 4 and xyz' You should get an email with a request for clarification Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else? you can response with something like use 8 instead of xyz you should see a final result on the CLI like I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32. as a final step, lets explore using a custom html template for the email ```diff src/cli.ts email: { address: process.env.HUMANLAYER_EMAIL, + // custom email body - jinja + template: `{% if type == 'request_more_information' %} +{{ event.spec.msg }} +{% else %} +agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} +with args: {{event.spec.kwargs}} +

+reply to this email to approve +{% endif %}` } } ```
skip this step cp ./walkthrough/11c-cli.ts src/cli.ts
first try with divide: npx tsx src/index.ts 'can you divide 4 by 5' you should see a slightly different email with the custom template ![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true) feel free to run with the flow and then you can try updating the template to your liking (if you're using cursor, something as simple as highlighting the template and asking to "make it better" should do the trick) try triggering "request_more_information" as well! thats it - in the next chapter, we'll build a fully email-driven workflow agent that uses webhooks for human approval ## Chapter XX - HumanLayer Webhook Integration the previous sections used the humanlayer SDK in "synchronous mode" - that means every time we wait for human approval, we sit in a loop polling until the human response if received. That's obviously not ideal, especially for production workloads, so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md) by updating the server to end processing after contacting a human, and use webhooks to receive the results. add code to initialize humanlayer in the server ```diff src/server.ts import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; +import { humanlayer } from 'humanlayer'; const app = express(); const store = new ThreadStore(); +const getHumanlayer = () => { + const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; + if (!HUMANLAYER_EMAIL) { + throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); + } + + const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; + if (!HUMANLAYER_API_KEY) { + throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); + } + return humanlayer({ + runId: `12fa-agent`, + contactChannel: { + email: { address: HUMANLAYER_EMAIL } + } + }); +} + // POST /thread - Start new thread app.post('/thread', async (req, res) => { // loop until stop event - const newThread = await agentLoop(thread); + const result = await agentLoop(thread); - store.update(req.params.id, newThread); + store.update(req.params.id, result); - lastEvent = newThread.events[newThread.events.length - 1]; + lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); - res.json(newThread); + res.json(result); }); ```
skip this step cp ./walkthrough/12-1-server-init.ts src/server.ts
next, lets update the /thread endpoint to 1. handle requests asynchronously, returning immediately 2. create a human contact on request_more_information and done_for_now calls Update the server to be able to handle request_clarification responses - remove the old /response endpoint and types - update the /thread endpoint to run processing asynchronously, return immediately - send a state.threadId when requesting human responses - add a handleHumanResponse function to process the human response - add a /webhook endpoint to handle the webhook response ```diff src/server.ts -import express from 'express'; +import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; -import { humanlayer } from 'humanlayer'; +import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); }); } - // POST /thread - Start new thread -app.post('/thread', async (req, res) => { +app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", }]); - const threadId = store.create(thread); - const newThread = await agentLoop(thread); - - store.update(threadId, newThread); + // run agent loop asynchronously, return immediately + Promise.resolve().then(async () => { + const threadId = store.create(thread); + const newThread = await agentLoop(thread); + + store.update(threadId, newThread); - const lastEvent = newThread.events[newThread.events.length - 1]; - // If we exited the loop, include the response URL so the client can - // push a new message onto the thread - lastEvent.data.response_url = `/thread/${threadId}/response`; + const lastEvent = newThread.events[newThread.events.length - 1]; - console.log("returning last event from endpoint", lastEvent); - - res.json({ - thread_id: threadId, - ...newThread + if (thread.awaitingHumanResponse()) { + const hl = getHumanlayer(); + // create a human contact - returns immediately + hl.createHumanContact({ + spec: { + msg: lastEvent.data.message, + state: { + thread_id: threadId, + } + } + }); + } }); + + res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status -app.get('/thread/:id', (req, res) => { +app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { }); +type WebhookResponse = V1Beta2HumanContactCompleted; -type ApprovalPayload = { - type: "approval"; - approved: boolean; - comment?: string; -} +const handleHumanResponse = async (req: Request, res: Response) => { -type ResponsePayload = { - type: "response"; - response: string; } -type Payload = ApprovalPayload | ResponsePayload; +app.post('/webhook', async (req: Request, res: Response) => { + console.log("webhook response", req.body); + const response = req.body as WebhookResponse; -// POST /thread/:id/response - Handle clarification response -app.post('/thread/:id/response', async (req, res) => { - let thread = store.get(req.params.id); + // response is guaranteed to be set on a webhook + const humanResponse: string = response.event.status?.response as string; + + const threadId = response.event.spec.state?.thread_id; + if (!threadId) { + return res.status(400).json({ error: "Thread ID not found" }); + } + + const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } - const body: Payload = req.body; - - let lastEvent = thread.events[thread.events.length - 1]; - - if (thread.awaitingHumanResponse() && body.type === 'response') { - thread.events.push({ - type: "human_response", - data: body.response - }); - } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { - // push feedback onto the thread - thread.events.push({ - type: "tool_response", - data: `user denied the operation with feedback: "${body.comment}"` - }); - } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { - // approved, run the tool, pushing results onto the thread - await handleNextStep(lastEvent.data, thread); - } else { - res.status(400).json({ - error: "Invalid request: " + body.type, - awaitingHumanResponse: thread.awaitingHumanResponse(), - awaitingHumanApproval: thread.awaitingHumanApproval() - }); - return; + if (!thread.awaitingHumanResponse()) { + return res.status(400).json({ error: "Thread is not awaiting human response" }); } - - // loop until stop event - const result = await agentLoop(thread); - - store.update(req.params.id, result); - - lastEvent = result.events[result.events.length - 1]; - lastEvent.data.response_url = `/thread/${req.params.id}/response`; - - console.log("returning last event from endpoint", lastEvent); - - res.json(result); }); ```
skip this step cp ./walkthrough/12a-server.ts src/server.ts
Start the server in another terminal npx tsx src/server.ts now that the server is running, send a payload to the '/thread' endpoint __ do the response step __ now handle approvals for divide __ now also handle done_for_now ================================================ FILE: workshops/2025-05-17/walkthrough.yaml ================================================ title: "Building the 12-factor agent template from scratch" text: "Steps to start from a bare TS repo and build up a 12-factor agent. This walkthrough will guide you through creating a TypeScript agent that follows the 12-factor methodology." targets: - markdown: "./build/walkthrough.md" onChange: diff: true cp: true newFiles: cat: false cp: true - folders: path: "./build/sections" skip: - "cleanup" final: dirName: "final" sections: - name: cleanup title: "Cleanup" text: "Make sure you're starting from a clean slate" steps: - text: "Clean up existing files" command: | rm -rf baml_src/ && rm -rf src/ - name: hello-world title: "Chapter 0 - Hello World" text: "Let's start with a basic TypeScript setup and a hello world program." steps: - text: | This guide is written in TypeScript (yes, a python version is coming soon) There are many checkpoints between the every file edit in theworkshop steps, so even if you aren't super familiar with typescript, you should be able to keep up and run each example. To run this guide, you'll need a relatively recent version of nodejs and npm installed You can use whatever nodejs version manager you want, [homebrew](https://formulae.brew.sh/formula/node) is fine command: brew install node@20 results: - text: "You should see the node version" code: | node --version - text: "Copy initial package.json" file: {src: ./walkthrough/00-package.json, dest: package.json} - text: "Install dependencies" command: | npm install incremental: true - text: "Copy tsconfig.json" file: {src: ./walkthrough/00-tsconfig.json, dest: tsconfig.json} - text: "add .gitignore" file: {src: ./walkthrough/00-.gitignore, dest: .gitignore} - text: "Create src folder" dir: {create: true, path: src} - text: "Add a simple hello world index.ts" file: {src: ./walkthrough/00-index.ts, dest: src/index.ts} - text: "Run it to verify" command: | npx tsx src/index.ts results: - text: "You should see:" code: | hello, world! - name: cli-and-agent title: "Chapter 1 - CLI and Agent Loop" text: "Now let's add BAML and create our first agent with a CLI interface." steps: - text: | First, we'll need to install [BAML](https://github.com/boundaryml/baml) which is a tool for prompting and structured outputs. command: | npm install @boundaryml/baml incremental: true - text: "Initialize BAML" command: | npx baml-cli init incremental: true - text: "Remove default resume.baml" command: | rm baml_src/resume.baml incremental: true - text: "Add our starter agent, a single baml prompt that we'll build on" file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml} - text: "Generate BAML client code" command: | npx baml-cli generate incremental: true - text: "Enable BAML logging for this section" command: | export BAML_LOG=debug - text: "Add the CLI interface" file: {src: ./walkthrough/01-cli.ts, dest: src/cli.ts} - text: "Update index.ts to use the CLI" file: {src: ./walkthrough/01-index.ts, dest: src/index.ts} - text: "Add the agent implementation" file: {src: ./walkthrough/01-agent.ts, dest: src/agent.ts} - text: | The the BAML code is configured to use BASETEN_API_KEY by default To get a Baseten API key and URL, create an account at [baseten.co](https://baseten.co), and then deploy [Qwen3 32B from the model library](https://www.baseten.co/library/qwen-3-32b/). ```rust function DetermineNextStep(thread: string) -> DoneForNow { client Qwen3 // ... ``` If you want to run the example with no changes, you can set the BASETEN_API_KEY env var to any valid baseten key. If you want to try swapping out the model, you can change the `client` line. [Docs on baml clients can be found here](https://docs.boundaryml.com/guide/baml-basics/switching-llms) For example, you can configure [gemini](https://docs.boundaryml.com/ref/llm-client-providers/google-ai-gemini) or [anthropic](https://docs.boundaryml.com/ref/llm-client-providers/anthropic) as your model provider. For example, to use openai with an OPENAI_API_KEY, you can do: client "openai/gpt-4o" - text: Set your env vars command: | export BASETEN_API_KEY=... export BASETEN_BASE_URL=... - text: "Try it out" command: | npx tsx src/index.ts hello results: - text: you should see a familiar response from the model code: | { intent: 'done_for_now', message: 'Hello! How can I assist you today?' } - name: calculator-tools title: "Chapter 2 - Add Calculator Tools" text: "Let's add some calculator tools to our agent." steps: - text: | Let's start by adding a tool definition for the calculator These are simpile structured outputs that we'll ask the model to return as a "next step" in the agentic loop. file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml} - text: | Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml} - text: "Generate updated BAML client" command: | npx baml-cli generate incremental: true - text: "Try out the calculator" command: | npx tsx src/index.ts 'can you add 3 and 4' results: - text: "You should see a tool call to the calculator" code: | { intent: 'add', a: 3, b: 4 } - name: tool-loop title: "Chapter 3 - Process Tool Calls in a Loop" text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM." steps: - text: | First, lets update the agent to handle the tool call file: {src: ./walkthrough/03-agent.ts, dest: src/agent.ts} - text: | Now, lets try it out command: | npx tsx src/index.ts 'can you add 3 and 4' results: - text: you should see the agent call the tool and then return the result code: | { intent: 'done_for_now', message: 'The sum of 3 and 4 is 7.' } - text: "For the next step, we'll do a more complex calculation, let's turn off the baml logs for more concise output" command: | export BAML_LOG=off - text: "Try a multi-step calculation" command: | npx tsx src/index.ts 'can you add 3 and 4, then add 6 to that result' - text: "you'll notice that tools like multiply and divide are not available" command: | npx tsx src/index.ts 'can you multiply 3 and 4' - text: | next, let's add handlers for the rest of the calculator tools file: {src: ./walkthrough/03b-agent.ts, dest: src/agent.ts} - text: "Test subtraction" command: | npx tsx src/index.ts 'can you subtract 3 from 4' - text: | now, let's test the multiplication tool command: | npx tsx src/index.ts 'can you multiply 3 and 4' - text: | finally, let's test a more complex calculation with multiple operations command: | npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' - text: | congratulations, you've taking your first step into hand-rolling an agent loop. from here, we're going to start incorporating some more intermediate and advanced concepts for 12-factor agents. - name: baml-tests title: "Chapter 4 - Add Tests to agent.baml" text: "Let's add some tests to our BAML agent." steps: - text: to start, leave the baml logs enabled command: | export BAML_LOG=debug - text: | next, let's add some tests to the agent We'll start with a simple test that checks the agent's ability to handle a basic calculation. file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml} - text: "Run the tests" command: | npx baml-cli test - text: | now, let's improve the test with assertions! Assertions are a great way to make sure the agent is working as expected, and can easily be extended to check for more complex behavior. file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml} - text: "Run the tests" command: | npx baml-cli test - text: | as you add more tests, you can disable the logs to keep the output clean. You may want to turn them on as you iterate on specific tests. command: | export BAML_LOG=off - text: | now, let's add some more complex test cases, where we resume from in the middle of an in-progress agentic context window file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml} - text: | let's try to run it command: | npx baml-cli test - name: human-tools title: "Chapter 5 - Multiple Human Tools" text: | In this section, we'll add support for multiple tools that serve to contact humans. steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: | first, let's add a tool that can request clarification from a human this will be different from the "done_for_now" tool, and can be used to more flexibly handle different types of human interactions in your agent. file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml} - text: | next, let's re-generate the client code NOTE - if you're using the VSCode extension for BAML, the client will be regenerated automatically when you save the file in your editor. command: | npx baml-cli generate incremental: true - text: | now, let's update the agent to use the new tool file: {src: ./walkthrough/05-agent.ts, dest: src/agent.ts} - text: | next, let's update the CLI to handle clarification requests by requesting input from the user on the CLI file: {src: ./walkthrough/05-cli.ts, dest: src/cli.ts} - text: | let's try it out command: | npx tsx src/index.ts 'can you multiply 3 and FD*(#F&& ' - text: | next, let's add a test that checks the agent's ability to handle a clarification request file: {src: ./walkthrough/05b-agent.baml, dest: baml_src/agent.baml} - text: | and now we can run the tests again command: | npx baml-cli test - text: | you'll notice the new test passes, but the hello world test fails This is because the agent's default behavior is to return "done_for_now" file: {src: ./walkthrough/05c-agent.baml, dest: baml_src/agent.baml} - text: "Verify tests pass" command: | npx baml-cli test - name: customize-prompt title: "Chapter 6 - Customize Your Prompt with Reasoning" text: | In this section, we'll explore how to customize the prompt of the agent with reasoning steps. this is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) there's a deep dive on reasoning on AI That Works [reasoning models versus reasoning steps](https://github.com/hellovai/ai-that-works/tree/main/2025-04-07-reasoning-models-vs-prompts) steps: - text: "for this section, it will be helpful to leave the baml logs enabled" command: | export BAML_LOG=debug - text: | update the agent prompt to include a reasoning step file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml} - text: generate the updated client command: | npx baml-cli generate incremental: true - text: | now, you can try it out with a simple prompt command: | npx tsx src/index.ts 'can you multiply 3 and 4' results: - text: you should see output from the baml logs showing the reasoning steps - text: | #### optional challenge add a field to your tool output format that includes the reasoning steps in the output! - name: context-window title: "Chapter 7 - Customize Your Context Window" text: | In this section, we'll explore how to customize the context window of the agent. this is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) steps: - text: | update the agent to pretty-print the Context window for the model file: {src: ./walkthrough/07-agent.ts, dest: src/agent.ts} - text: "Test the formatting" command: | BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' - text: | next, let's update the agent to use XML formatting instead this is a very popular format for passing data to a model, among other things, because of the token efficiency of XML. file: {src: ./walkthrough/07b-agent.ts, dest: src/agent.ts} - text: | let's try it out command: | BAML_LOG=info npx tsx src/index.ts 'can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result' - text: | lets update our tests to match the new output format file: {src: ./walkthrough/07c-agent.baml, dest: baml_src/agent.baml} - text: | check out the updated tests command: | npx baml-cli test - name: api-endpoints title: "Chapter 8 - Adding API Endpoints" text: "Add an Express server to expose the agent via HTTP." steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: "Install Express and types" command: | npm install express && npm install --save-dev @types/express supertest incremental: true - text: "Add the server implementation" file: {src: ./walkthrough/08-server.ts, dest: src/server.ts} - text: "Start the server" command: | npx tsx src/server.ts - text: "Test with curl (in another terminal)" command: | curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you add 3 and 4"}' results: - text: | You should get an answer from the agent which includes the agentic trace, ending in a message like: code: | {"intent":"done_for_now","message":"The sum of 3 and 4 is 7."} - name: state-management title: "Chapter 9 - In-Memory State and Async Clarification" text: "Add state management and async clarification support." steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: "Add some simple in-memory state management for threads" file: {src: ./walkthrough/09-state.ts, dest: src/state.ts} - text: | update the server to use the state management * Add thread state management using `ThreadStore` * return thread IDs and response URLs from the /thread endpoint * implement GET /thread/:id * implement POST /thread/:id/response file: {src: ./walkthrough/09-server.ts, dest: src/server.ts} - text: "Start the server" command: | npx tsx src/server.ts - text: "Test clarification flow" command: | curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you multiply 3 and xyz"}' - name: human-approval title: "Chapter 10 - Adding Human Approval" text: "Add support for human approval of operations." steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: | update the server to handle human approvals * Import `handleNextStep` to execute approved actions * Add two payload types to distinguish approvals from responses * Handle responses and approvals differently in the endpoint * Show better error messages when things go wrongs file: {src: ./walkthrough/10-server.ts, dest: src/server.ts} - text: "Add a few methods to the agent to handle approvals and responses" file: {src: ./walkthrough/10-agent.ts, dest: src/agent.ts} - text: "Start the server" command: | npx tsx src/server.ts - text: "Test division with approval" command: | curl -X POST http://localhost:3000/thread \ -H "Content-Type: application/json" \ -d '{"message":"can you divide 3 by 4"}' results: - text: "You should see:" code: | { "thread_id": "2b243b66-215a-4f37-8bc6-9ace3849043b", "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } } ] } - text: "reject the request with another curl call, changing the thread ID" command: | curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": false, "comment": "I dont think thats right, use 5 instead of 4"}' results: - text: 'You should see: the last tool call is now `"intent":"divide","a":3,"b":5`' code: | { "events": [ { "type": "user_input", "data": "can you divide 3 by 4" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 4, "response_url": "/thread/2b243b66-215a-4f37-8bc6-9ace3849043b/response" } }, { "type": "tool_response", "data": "user denied the operation with feedback: \"I dont think thats right, use 5 instead of 4\"" }, { "type": "tool_call", "data": { "intent": "divide", "a": 3, "b": 5, "response_url": "/thread/1f1f5ff5-20d7-4114-97b4-3fc52d5e0816/response" } } ] } - text: "now you can approve the operation" command: | curl -X POST 'http://localhost:3000/thread/{thread_id}/response' \ -H "Content-Type: application/json" \ -d '{"type": "approval", "approved": true}' results: - text: "you should see the final message includes the tool response and final result!" code: | ... { "type": "tool_response", "data": 0.5 }, { "type": "done_for_now", "message": "I divided 3 by 6 and the result is 0.5. If you have any more operations or queries, feel free to ask!", "response_url": "/thread/2b469403-c497-4797-b253-043aae830209/response" } - name: humanlayer-approval title: "Chapter 11 - Human Approvals over email" text: | in this section, we'll add support for human approvals over email. This will start a little bit contrived, just to get the concepts down - We'll start by invoking the workflow from the CLI but approvals for `divide` and `request_more_information` will be handled over email, then the final `done_for_now` answer will be printed back to the CLI While contrived, this is a great example of the flexibility you get from [factor 7 - contact humans with tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-7-contact-humans-with-tools.md) steps: - text: "for this section, we'll disable the baml logs. You can optionally enable them if you want to see more details." command: | export BAML_LOG=off - text: "Install HumanLayer" command: | npm install humanlayer incremental: true - text: "Update CLI to send `divide` and `request_more_information` to a human via email" file: {src: ./walkthrough/11-cli.ts, dest: src/cli.ts} - text: "Run the CLI" command: | npx tsx src/index.ts 'can you divide 4 by 5' results: - text: "The last line of your program should mention human review step" code: | nextStep { intent: 'divide', a: 4, b: 5 } HumanLayer: Requested human approval from HumanLayer cloud - text: | go ahead and respond to the email with some feedback: ![reject-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-reject.png?raw=true) - text: | you should get another email with an updated attempt based on your feedback! You can go ahead and approve this one: ![approve-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-approve.png?raw=true) results: - text: and your final output will look like code: | nextStep { intent: 'done_for_now', message: 'The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask!' } The division of 4 by 5 is 0.8. If you have any other calculations or questions, feel free to ask! - text: | lets implement the `request_more_information` flow as well file: {src: ./walkthrough/11b-cli.ts, dest: src/cli.ts} - text: | lets test the require_approval flow as by asking for a calculation with garbled input: command: | npx tsx src/index.ts 'can you multiply 4 and xyz' - text: "You should get an email with a request for clarification" command: | Can you clarify what 'xyz' represents in this context? Is it a specific number, variable, or something else? - text: you can response with something like command: | use 8 instead of xyz results: - text: you should see a final result on the CLI like code: | I have multiplied 4 and xyz, using the value 8 for xyz, resulting in 32. - text: | as a final step, lets explore using a custom html template for the email file: {src: ./walkthrough/11c-cli.ts, dest: src/cli.ts} - text: | first try with divide: command: | npx tsx src/index.ts 'can you divide 4 by 5' results: - text: | you should see a slightly different email with the custom template ![custom-template-email](https://github.com/humanlayer/12-factor-agents/blob/main/workshops/2025-05/walkthrough/11-email-custom.png?raw=true) feel free to run with the flow and then you can try updating the template to your liking (if you're using cursor, something as simple as highlighting the template and asking to "make it better" should do the trick) try triggering "request_more_information" as well! - text: | thats it - in the next chapter, we'll build a fully email-driven workflow agent that uses webhooks for human approval - name: humanlayer-webhook title: "Chapter XX - HumanLayer Webhook Integration" text: | the previous sections used the humanlayer SDK in "synchronous mode" - that means every time we wait for human approval, we sit in a loop polling until the human response if received. That's obviously not ideal, especially for production workloads, so in this section we'll implement [factor 6 - launch / pause / resume with simple APIs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-6-launch-pause-resume.md) by updating the server to end processing after contacting a human, and use webhooks to receive the results. steps: - text: | add code to initialize humanlayer in the server file: {src: ./walkthrough/12-1-server-init.ts, dest: src/server.ts} - text: | next, lets update the /thread endpoint to 1. handle requests asynchronously, returning immediately 2. create a human contact on request_more_information and done_for_now calls # file: {src: } - text: | Update the server to be able to handle request_clarification responses - remove the old /response endpoint and types - update the /thread endpoint to run processing asynchronously, return immediately - send a state.threadId when requesting human responses - add a handleHumanResponse function to process the human response - add a /webhook endpoint to handle the webhook response file: {src: ./walkthrough/12a-server.ts, dest: src/server.ts} - text: "Start the server in another terminal" command: | npx tsx src/server.ts - text: | now that the server is running, send a payload to the '/thread' endpoint - text: __ do the response step - text: __ now handle approvals for divide - text: __ now also handle done_for_now ================================================ FILE: workshops/2025-07-16/.gitignore ================================================ baml_src/*.baml src/*.ts package.json package-lock.json tsconfig.json build/ tmp/ ================================================ FILE: workshops/2025-07-16/CLAUDE.md ================================================ # Workshop 2025-07-16: Python/Jupyter Notebook Implementation • **Main Tool**: `walkthroughgen_py.py` - Converts TypeScript walkthrough to Jupyter notebooks • **Config**: `walkthrough.yaml` - Defines notebook structure and content • **Output**: `workshop_final.ipynb` - Generated notebook with Chapters 0-7 • **Testing**: `test_notebook_colab_sim.sh` - Simulates Google Colab environment ## Key Implementation Learnings • **No async/await in notebooks** - All BAML calls must be synchronous, remove all async patterns • **No sys.argv** - Main functions accept parameters directly: `main("hello")` not command line args • **Global namespace** - Functions defined in cells persist globally, no module imports between cells • **BAML setup is optional** - Use `baml_setup: true` step only when introducing BAML (Chapter 1+) • **get_baml_client() pattern** - Required workaround for Google Colab import cache issues • **BAML files from GitHub** - Fetch with curl since Colab can't display local BAML files • **Regenerate BAML** - Use `regenerate_baml: true` in run_main when BAML files change • **Import removal** - Remove `from baml_client import get_baml_client` imports from Python files • **IN_COLAB detection** - Use try/except on google.colab import to detect environment • **Human input handling** - get_human_input() uses real input() in Colab, auto-responses locally ## Implementation Patterns • **walkthroughgen_py.py enhancements** - Added kwargs support for run_main steps • **Test simulation** - test_notebook_colab_sim.sh creates clean venv with all dependencies • **Debug artifacts** - Test runs preserved in ./tmp/test_TIMESTAMP/ directories • **BAML test support** - baml-cli test works fine in notebooks, contrary to initial assumption • **Tool execution** - All calculator operations (add/subtract/multiply/divide) in agent loop • **Clarification flow** - ClarificationRequest tool for handling ambiguous inputs • **Serialization formats** - JSON vs XML for thread history (XML more token-efficient) • **Progressive complexity** - Start with hello world, gradually add BAML, tools, loops, tests ## Chapter Implementation Status • **Chapter 0**: Hello World - Simple Python program, no BAML ✅ • **Chapter 1**: CLI and Agent - BAML introduction, basic agent ✅ • **Chapter 2**: Calculator Tools - Tool definitions without execution ✅ • **Chapter 3**: Tool Loop - Full agent loop with tool execution ✅ • **Chapter 4**: BAML Tests - Test cases with assertions ✅ • **Chapter 5**: Human Tools - Clarification requests with input handling ✅ • **Chapter 6**: Improved Prompting - Reasoning steps in prompts ✅ • **Chapter 7**: Context Serialization - JSON/XML thread formats ✅ • **Chapters 8-12**: Skipped - Server-based features not suitable for notebooks ⚠️ ## Common Pitfalls Avoided • **Import errors** - baml_client imports fail in notebooks, use global get_baml_client • **Async patterns** - Notebooks can't handle async/await, everything must be sync • **File paths** - Use absolute paths from notebook directory, handle ./ prefixes • **BAML file conflicts** - Each chapter updates same files (agent.baml) not chapter-specific • **Tool registration** - Ensure all tool types handled in agent loop switch statement • **Test expectations** - BAML tests may have varying outputs, assertions verify key properties • **Environment differences** - Code must work in both Colab and local testing environments ## Testing Commands • Generate notebook: `uv run python walkthroughgen_py.py walkthrough.yaml -o test.ipynb` • Full Colab sim: `./test_notebook_colab_sim.sh` • Run BAML tests: `baml-cli test` (from directory with baml_src) ## File Structure • `walkthrough/*.py` - Python implementations of each chapter's code • `walkthrough/*.baml` - BAML files fetched from GitHub during notebook execution • `walkthroughgen_py.py` - Main conversion tool • `walkthrough.yaml` - Notebook definition with all chapters • `test_notebook_colab_sim.sh` - Full Colab environment simulation • `workshop_final.ipynb` - Final generated notebook ready for workshop ================================================ FILE: workshops/2025-07-16/hack/analyze_log_capture.py ================================================ #!/usr/bin/env python3 """ Analyze notebook for BAML log capture success/failure """ import json import sys import os def check_logs(notebook_path): """Check if BAML logs were captured in the notebook""" if not os.path.exists(notebook_path): print(f"❌ Notebook not found: {notebook_path}") return False, False with open(notebook_path) as f: nb = json.load(f) found_log_pattern = False found_capture_test = False for i, cell in enumerate(nb['cells']): if cell['cell_type'] == 'code' and 'outputs' in cell: # Check if this is a log capture test cell source = ''.join(cell.get('source', [])) if 'run_with_baml_logs' in source: found_capture_test = True print(f'Found log capture test in cell {i}') # Check outputs for BAML logs for output in cell['outputs']: if output.get('output_type') == 'stream' and 'text' in output: text = ''.join(output['text']) # Look for the specific BAML log pattern if '---Parsed Response (class DoneForNow)---' in text: found_log_pattern = True print(f'✅ FOUND BAML LOG PATTERN in cell {i} output!') log_lines = [line for line in text.split('\n') if 'Parsed Response' in line] if log_lines: print(f'Log excerpt: {log_lines[0]}') # Also check for our test markers if 'Captured BAML Logs' in text: print(f'Found "Captured BAML Logs" section in cell {i}') if 'No BAML Logs Captured' in text: print(f'Found "No BAML Logs Captured" section in cell {i}') return found_capture_test, found_log_pattern def main(): if len(sys.argv) != 2: print("Usage: python analyze_log_capture.py ") sys.exit(1) notebook_path = sys.argv[1] capture_test_found, log_pattern_found = check_logs(notebook_path) if not capture_test_found: print('❌ FAIL: No log capture test found in notebook') sys.exit(1) if log_pattern_found: print('✅ PASS: BAML logs successfully captured in notebook output!') sys.exit(0) else: print('❌ FAIL: BAML log pattern not found in captured output') print('This means the log capture method is NOT working') sys.exit(1) if __name__ == '__main__': main() ================================================ FILE: workshops/2025-07-16/hack/inspect_notebook.py ================================================ #!/usr/bin/env python3 """ Utility to inspect notebook cell outputs for debugging """ import json import sys import os def inspect_notebook(notebook_path, filter_keyword=None): """Inspect notebook cells and outputs""" if not os.path.exists(notebook_path): print(f"❌ Notebook not found: {notebook_path}") return with open(notebook_path) as f: nb = json.load(f) print(f"📓 Inspecting notebook: {notebook_path}") print(f"📊 Total cells: {len(nb['cells'])}") print("=" * 60) for i, cell in enumerate(nb['cells']): if cell['cell_type'] == 'code': source = ''.join(cell.get('source', [])) # Filter by keyword if provided if filter_keyword and filter_keyword.lower() not in source.lower(): continue print(f"\n🔍 CELL {i} ({'code'})") print("📝 SOURCE:") print(source[:300] + "..." if len(source) > 300 else source) if 'outputs' in cell and cell['outputs']: print(f"\n📤 OUTPUTS ({len(cell['outputs'])} outputs):") for j, output in enumerate(cell['outputs']): output_type = output.get('output_type', 'unknown') print(f" Output {j}: type={output_type}") if 'text' in output: text = ''.join(output['text']) print(f" Text length: {len(text)} chars") # Show first few lines for context lines = text.split('\n')[:5] for line in lines: if line.strip(): print(f" > {line[:80]}...") # Check for interesting patterns patterns = ['BAML', 'Parsed', 'Response', 'Error', 'Exception'] found_patterns = [p for p in patterns if p in text] if found_patterns: print(f" 🎯 Found patterns: {found_patterns}") elif 'data' in output: data_keys = list(output['data'].keys()) print(f" Data keys: {data_keys}") # Check for execution errors if output_type == 'error': print(f" ❌ ERROR: {output.get('ename', 'Unknown')}") print(f" 💬 Message: {output.get('evalue', 'No message')}") if 'traceback' in output: print(f" 📍 Traceback: {len(output['traceback'])} lines") # Show last few lines of traceback for line in output['traceback'][-3:]: print(f" 🔍 {line.strip()}") else: print("\n📤 No outputs") print("-" * 40) def main(): if len(sys.argv) < 2: print("Usage: python inspect_notebook.py [filter_keyword]") sys.exit(1) notebook_path = sys.argv[1] filter_keyword = sys.argv[2] if len(sys.argv) > 2 else None inspect_notebook(notebook_path, filter_keyword) if __name__ == '__main__': main() ================================================ FILE: workshops/2025-07-16/hack/minimal_test.ipynb ================================================ { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import sys\n", "print(\"Hello stdout!\")\n", "print(\"Hello stderr!\", file=sys.stderr)\n", "with open(\"test_output.txt\", \"w\") as f:\n", " f.write(\"Notebook executed successfully!\\n\")\n", "print(\"✅ Test complete\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.8.0" } }, "nbformat": 4, "nbformat_minor": 4 } ================================================ FILE: workshops/2025-07-16/hack/test_log_capture.sh ================================================ #!/bin/bash set -e echo "🧪 Testing BAML Log Capture..." # Clean up any previous test rm -f test_capture.ipynb rm -rf tmp/test_capture_* # Generate test notebook echo "📝 Generating test notebook..." uv run python walkthroughgen_py.py simple_log_test.yaml -o test_capture.ipynb # Run in sim echo "🚀 Running test in sim..." ./test_notebook_colab_sim.sh test_capture.ipynb > /dev/null 2>&1 # Find the executed notebook in the timestamped directory NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1) NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb" echo "📋 Analyzing results from $NOTEBOOK_PATH..." # First dump debug info echo "🔍 Dumping debug info..." python3 inspect_notebook.py "$NOTEBOOK_PATH" "run_with_baml_logs" echo "" echo "📊 Running log capture analysis..." # Check for BAML log patterns in the executed notebook python3 analyze_log_capture.py "$NOTEBOOK_PATH" echo "🧹 Cleaning up..." rm -f test_capture.ipynb ================================================ FILE: workshops/2025-07-16/hack/testing.md ================================================ # Jupyter Notebook Testing Framework This document describes the general testing framework for validating any functionality in Jupyter notebooks, with a specific example of testing BAML log capture. ## General Framework ### Overview The testing framework provides a complete iteration loop for testing notebook implementations: 1. **Generate** test notebooks with specific functionality 2. **Execute** notebooks in a simulated Google Colab environment 3. **Analyze** executed notebooks for expected outputs and behaviors 4. **Report** clear pass/fail results ### Core Components #### Notebook Simulator (`test_notebook_colab_sim.sh`) The simulation script creates a realistic Google Colab environment for any notebook: **Environment Setup:** - Creates timestamped test directory: `./tmp/test_YYYYMMDD_HHMMSS/` - Sets up fresh Python virtual environment - Installs Jupyter dependencies (`notebook`, `nbconvert`, `ipykernel`) **Notebook Execution:** - Copies test notebook to clean environment - Uses `ExecutePreprocessor` to run all cells (simulates Colab execution) - **Critical:** Activates virtual environment before execution - **Critical:** Saves executed notebook with cell outputs back to disk **Usage:** ```bash ./test_notebook_colab_sim.sh your_notebook.ipynb ``` The simulator will: - Execute all cells in the notebook - Preserve the test directory for inspection - Show final directory structure - Report success/failure #### Output Inspector (`inspect_notebook.py`) Debug utility for examining notebook cell outputs in detail: **Features:** - Shows cell source code and execution counts - Displays all output types (stream, execute_result, error) - Highlights patterns in output text - Shows execution errors with tracebacks - Filters cells by keywords for focused debugging **Usage:** ```bash # Inspect all cells python3 inspect_notebook.py path/to/notebook.ipynb # Filter for specific content python3 inspect_notebook.py path/to/notebook.ipynb "keyword" # Look for errors python3 inspect_notebook.py path/to/notebook.ipynb "error" ``` **Sample Output:** ``` 🔍 CELL 0 (code) 📝 SOURCE: import sys print("Hello!") print("Error!", file=sys.stderr) 📤 OUTPUTS (2 outputs): Output 0: type=stream Text length: 7 chars > Hello!... Output 1: type=stream Text length: 7 chars > Error!... 🎯 Found patterns: ['Error'] ``` ### Key Insights for Notebook Testing #### Execution Environment 1. **Virtual environment activation is critical** - Without it, execution fails silently 2. **Output persistence must be explicit** - `ExecutePreprocessor` only modifies notebook in memory 3. **Check execution counts** - `execution_count=None` means cell never executed 4. **Handle different output types** - stream, execute_result, error, display_data #### Common Debugging Steps 1. **Verify basic execution:** ```bash python3 -c " import json nb = json.load(open('path/to/notebook.ipynb')) print('Execution counts:', [cell.get('execution_count') for cell in nb['cells'] if cell['cell_type']=='code']) " ``` 2. **Check for execution errors:** ```bash python3 inspect_notebook.py path/to/notebook.ipynb "error" ``` 3. **Look for specific output patterns:** ```bash python3 inspect_notebook.py path/to/notebook.ipynb "your_pattern" ``` ### Creating Custom Tests #### 1. Minimal Test Template Create a simple notebook that tests basic functionality: ```json { "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Test basic execution\n", "print('Hello from notebook!')\n", "\n", "# Test file creation\n", "with open('test.txt', 'w') as f:\n", " f.write('Test successful\\n')\n", "\n", "# Test error handling\n", "try:\n", " result = your_function_to_test()\n", " print(f'Result: {result}')\n", "except Exception as e:\n", " print(f'Error: {e}')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 4 } ``` #### 2. Test Script Template ```bash #!/bin/bash set -e echo "🧪 Testing [Your Feature]..." # Clean up any previous test rm -f test_notebook.ipynb # Generate or copy your test notebook cp your_test_notebook.ipynb test_notebook.ipynb # Run in simulator echo "🚀 Running test in sim..." ./test_notebook_colab_sim.sh test_notebook.ipynb # Find the executed notebook NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1) NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb" # Analyze results echo "📋 Analyzing results..." python3 inspect_notebook.py "$NOTEBOOK_PATH" "your_search_term" # Add your custom analysis python3 -c " import json with open('$NOTEBOOK_PATH') as f: nb = json.load(f) # Your custom analysis logic here success = check_for_expected_outputs(nb) if success: print('✅ PASS: Test succeeded!') else: print('❌ FAIL: Test failed!') exit(1) " echo "🧹 Cleaning up..." rm -f test_notebook.ipynb ``` --- ## Use Case: BAML Log Capture Testing This section demonstrates how to use the general framework for a specific use case: testing BAML log capture in notebooks. ### Problem Statement BAML (a language model framework) uses FFI bindings to a Rust binary and outputs logs to stderr. We need to test whether different log capture methods can successfully capture these logs in Jupyter notebook cells. ### Test Implementation #### Test Configuration (`simple_log_test.yaml`) ```yaml title: "BAML Log Capture Test" text: "Simple test for log capture" sections: - title: "Log Capture Test" steps: - baml_setup: true - fetch_file: src: "walkthrough/01-agent.baml" dest: "baml_src/agent.baml" - file: src: "./simple_main.py" - text: "Testing log capture with show_logs=true:" - run_main: args: "What is 2+2?" show_logs: true ``` #### Test Function (`simple_main.py`) ```python def main(message="What is 2+2?"): """Simple main function that calls BAML directly""" client = get_baml_client() # Call the BAML function - this should generate logs result = client.DetermineNextStep(f"User asked: {message}") print(f"Input: {message}") print(f"Result: {result}") return result ``` #### Log Capture Implementation The current working implementation in `walkthroughgen_py.py`: ```python def run_with_baml_logs(func, *args, **kwargs): """Test log capture using IPython capture_output""" # Ensure BAML_LOG is set if 'BAML_LOG' not in os.environ: os.environ['BAML_LOG'] = 'info' print(f"[LOG CAPTURE TEST] Running with BAML_LOG={os.environ.get('BAML_LOG')}...") # Capture both stdout and stderr with capture_output() as captured: result = func(*args, **kwargs) # Display captured outputs if captured.stdout: print("=== Captured Stdout ===") print(captured.stdout) if captured.stderr: print("=== Captured BAML Logs ===") print(captured.stderr) else: print("=== No BAML Logs Captured ===") print("=== Function Result ===") print(result) return result ``` ### Test Execution #### Main Test Script (`test_log_capture.sh`) ```bash #!/bin/bash set -e echo "🧪 Testing BAML Log Capture..." # Generate test notebook from YAML config echo "📝 Generating test notebook..." uv run python walkthroughgen_py.py simple_log_test.yaml -o test_capture.ipynb # Run in simulator echo "🚀 Running test in sim..." ./test_notebook_colab_sim.sh test_capture.ipynb # Find the executed notebook NOTEBOOK_DIR=$(ls -1dt tmp/test_* | head -1) NOTEBOOK_PATH="$NOTEBOOK_DIR/test_notebook.ipynb" echo "📋 Analyzing results from $NOTEBOOK_PATH..." # Debug output echo "🔍 Dumping debug info..." python3 inspect_notebook.py "$NOTEBOOK_PATH" "run_with_baml_logs" # Analyze for BAML log patterns echo "📊 Running log capture analysis..." python3 analyze_log_capture.py "$NOTEBOOK_PATH" echo "🧹 Cleaning up..." rm -f test_capture.ipynb ``` #### Analysis Script (`analyze_log_capture.py`) ```python #!/usr/bin/env python3 import json import sys import os def check_logs(notebook_path): """Check if BAML logs were captured in the notebook""" with open(notebook_path) as f: nb = json.load(f) found_log_pattern = False found_capture_test = False for i, cell in enumerate(nb['cells']): if cell['cell_type'] == 'code' and 'outputs' in cell: source = ''.join(cell.get('source', [])) if 'run_with_baml_logs' in source: found_capture_test = True print(f'Found log capture test in cell {i}') # Check outputs for BAML logs for output in cell['outputs']: if output.get('output_type') == 'stream' and 'text' in output: text = ''.join(output['text']) # Look for the specific BAML log pattern if '---Parsed Response (class DoneForNow)---' in text: found_log_pattern = True print(f'✅ FOUND BAML LOG PATTERN in cell {i} output!') return found_capture_test, found_log_pattern # Run analysis and return pass/fail capture_test_found, log_pattern_found = check_logs(sys.argv[1]) if not capture_test_found: print('❌ FAIL: No log capture test found in notebook') sys.exit(1) if log_pattern_found: print('✅ PASS: BAML logs successfully captured in notebook output!') sys.exit(0) else: print('❌ FAIL: BAML log pattern not found in captured output') sys.exit(1) ``` ### Expected Output Flow #### Successful Test Run: ```bash $ ./test_log_capture.sh 🧪 Testing BAML Log Capture... 📝 Generating test notebook... Generated notebook: test_capture.ipynb 🚀 Running test in sim... 🧪 Creating clean test environment in: ./tmp/test_20250716_191106 📁 Test directory will be preserved for inspection 🐍 Creating fresh Python virtual environment... 📦 Installing Jupyter dependencies... 🏃 Running notebook in clean environment... ✅ Notebook executed successfully! 💾 Executed notebook saved with outputs 📋 Analyzing results from tmp/test_20250716_191106/test_notebook.ipynb... 🔍 Dumping debug info... Found log capture test in cell 11 📤 OUTPUTS (3 outputs): Output 0: type=stream Text length: 49 chars > [LOG CAPTURE TEST] Running with BAML_LOG=info...... Output 1: type=stream Text length: 1272 chars > 2025-07-16T19:11:22.445 [BAML [92mINFO[0m] [35mFunction DetermineNextStep[0m... 🎯 Found patterns: ['BAML', 'Parsed', 'Response'] 📊 Running log capture analysis... Found log capture test in cell 11 ✅ FOUND BAML LOG PATTERN in cell 11 output! ✅ PASS: BAML logs successfully captured in notebook output! 🧹 Cleaning up... ``` ### Key BAML-Specific Insights 1. **BAML logs go to stderr** - Due to FFI bindings to Rust binary 2. **Requires `BAML_LOG=info`** - Environment variable controls verbosity 3. **Logs include ANSI color codes** - Need to handle terminal formatting 4. **Pattern matching** - Look for `---Parsed Response (class DoneForNow)---` to confirm successful execution 5. **IPython capture_output() works** - Successfully captures stderr in notebook context ### Iteration Loop Benefits This framework enables rapid testing of different log capture approaches: 1. **Modify** the `run_with_baml_logs` function in `walkthroughgen_py.py` 2. **Run** `./test_log_capture.sh` 3. **Get** immediate pass/fail feedback 4. **Debug** with `inspect_notebook.py` if needed 5. **Repeat** until working implementation found This same pattern can be applied to test any notebook functionality: library integrations, environment setup, output formatting, error handling, etc. ================================================ FILE: workshops/2025-07-16/pyproject.toml ================================================ [project] name = "workshops" version = "0.1.0" description = "Add your description here" readme = "README.md" requires-python = ">=3.11" dependencies = [ "baml>=0.19.1", "jupyter>=1.1.1", "nbformat>=5.10.4", "pyyaml>=6.0.2", ] ================================================ FILE: workshops/2025-07-16/test_notebook_colab_sim.sh ================================================ #!/bin/bash # Simulate Google Colab environment for testing notebooks set -e NOTEBOOK_PATH="$1" if [ -z "$NOTEBOOK_PATH" ]; then echo "Usage: $0 " exit 1 fi # Get absolute path of notebook NOTEBOOK_PATH=$(realpath "$NOTEBOOK_PATH") # Create test directory in current folder TIMESTAMP=$(date +%Y%m%d_%H%M%S) TEMP_DIR="./tmp/test_${TIMESTAMP}" mkdir -p "$TEMP_DIR" echo "🧪 Creating clean test environment in: $TEMP_DIR" # Don't auto-cleanup so we can inspect it echo "📁 Test directory will be preserved for inspection" # Change to temp directory cd "$TEMP_DIR" # Create fresh Python virtual environment echo "🐍 Creating fresh Python virtual environment..." python3 -m venv venv source venv/bin/activate # Install jupyter dependencies echo "📦 Installing Jupyter dependencies..." pip install --quiet notebook nbconvert ipykernel # Copy notebook to temp directory cp "$NOTEBOOK_PATH" test_notebook.ipynb # Create a Python script to execute the notebook cat > run_notebook.py << 'EOF' import nbformat from nbconvert.preprocessors import ExecutePreprocessor import sys import os # Ensure OPENAI_API_KEY is passed through if 'OPENAI_API_KEY' in os.environ: print(f"✅ OPENAI_API_KEY is set") else: print("⚠️ Warning: OPENAI_API_KEY not set") # Read notebook with open('test_notebook.ipynb', 'r') as f: nb = nbformat.read(f, as_version=4) # Execute ALL cells (just like Colab) ep = ExecutePreprocessor(timeout=120, kernel_name='python3') print("🚀 Executing notebook (this simulates Google Colab)...") print("=" * 60) try: ep.preprocess(nb, {'metadata': {'path': '.'}}) print("\n✅ Notebook executed successfully!") # Save the executed notebook back to disk with open('test_notebook.ipynb', 'w') as f: nbformat.write(nb, f) print("💾 Executed notebook saved with outputs") # Show final directory structure print("\n📁 Final directory structure:") for root, dirs, files in os.walk('.'): level = root.replace('.', '').count(os.sep) indent = ' ' * 2 * level print(f"{indent}{os.path.basename(root)}/") subindent = ' ' * 2 * (level + 1) for file in files[:10]: # Limit output if not file.startswith('.'): print(f"{subindent}{file}") except Exception as e: print(f"\n❌ Error executing notebook: {e}") if hasattr(e, 'traceback'): print("\nTraceback:") print(e.traceback) sys.exit(1) EOF # Run the notebook echo "🏃 Running notebook in clean environment..." source venv/bin/activate && python run_notebook.py # Check what BAML files were created echo -e "\n📄 BAML files created:" if [ -d "baml_src" ]; then ls -la baml_src/ else echo "No baml_src directory found" fi # Check if Python BAML client was generated echo -e "\n🐍 Python BAML client:" if [ -d "baml_client" ]; then # Check if it's Python or TypeScript if [ -f "baml_client/__init__.py" ]; then echo "✅ Python client generated" ls baml_client/*.py 2>/dev/null | head -5 else echo "❌ TypeScript client generated (not Python)" ls baml_client/*.ts 2>/dev/null | head -5 fi else echo "No baml_client directory found" fi echo -e "\n✨ Test complete!" ================================================ FILE: workshops/2025-07-16/walkthrough/00-.gitignore ================================================ baml_client/ node_modules/ ================================================ FILE: workshops/2025-07-16/walkthrough/00-main.py ================================================ def hello(): print('hello, world!') def main(): hello() ================================================ FILE: workshops/2025-07-16/walkthrough/00-package.json ================================================ { "name": "my-agent", "version": "0.1.0", "private": true, "scripts": { "dev": "tsx src/index.ts", "build": "tsc" }, "dependencies": { "tsx": "^4.15.0", "typescript": "^5.0.0" }, "devDependencies": { "@types/node": "^20.0.0", "@typescript-eslint/eslint-plugin": "^6.0.0", "@typescript-eslint/parser": "^6.0.0", "eslint": "^8.0.0" } } ================================================ FILE: workshops/2025-07-16/walkthrough/00-tsconfig.json ================================================ { "compilerOptions": { "target": "ES2017", "lib": ["esnext"], "allowJs": true, "skipLibCheck": true, "strict": true, "noEmit": true, "esModuleInterop": true, "module": "esnext", "moduleResolution": "bundler", "resolveJsonModule": true, "isolatedModules": true, "jsx": "preserve", "incremental": true, "plugins": [], "paths": { "@/*": ["./*"] } }, "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], "exclude": ["node_modules", "walkthrough"] } ================================================ FILE: workshops/2025-07-16/walkthrough/01-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> DoneForNow { client "openai/gpt-4o" // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-07-16/walkthrough/01-agent.py ================================================ import json from typing import Dict, Any, List # tool call or a respond to human tool AgentResponse = Any # This will be the return type from b.DetermineNextStep class Event: def __init__(self, type: str, data: Any): self.type = type self.data = data class Thread: def __init__(self, events: List[Dict[str, Any]]): self.events = events def serialize_for_llm(self): # can change this to whatever custom serialization you want to do, XML, etc # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return json.dumps(self.events) # right now this just runs one turn with the LLM, but # we'll update this function to handle all the agent logic def agent_loop(thread: Thread) -> AgentResponse: b = get_baml_client() # This will be defined by the BAML setup next_step = b.DetermineNextStep(thread.serialize_for_llm()) return next_step ================================================ FILE: workshops/2025-07-16/walkthrough/01-main.py ================================================ def main(message="hello from the notebook!"): # Create a new thread with the user's message as the initial event thread = Thread([{"type": "user_input", "data": message}]) # Run the agent loop with the thread result = agent_loop(thread) print(result) ================================================ FILE: workshops/2025-07-16/walkthrough/02-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } function DetermineNextStep( thread: string ) -> CalculatorTools | DoneForNow { client "openai/gpt-4o" // use /nothink for now because the thinking tokens (or streaming thereof) screw with baml (i think (no pun intended)) prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } ================================================ FILE: workshops/2025-07-16/walkthrough/02-main.py ================================================ def main(message="hello from the notebook!"): # Create a new thread with the user's message thread = Thread([{"type": "user_input", "data": message}]) # Get BAML client b = get_baml_client() # Get the next step from the agent - just show the tool call next_step = b.DetermineNextStep(thread.serialize_for_llm()) # Print the raw response to show the tool call print(next_step) ================================================ FILE: workshops/2025-07-16/walkthrough/02-tool_calculator.baml ================================================ type CalculatorTools = AddTool | SubtractTool | MultiplyTool | DivideTool class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } ================================================ FILE: workshops/2025-07-16/walkthrough/03-agent.py ================================================ import json from typing import Dict, Any, List class Thread: def __init__(self, events: List[Dict[str, Any]]): self.events = events def serialize_for_llm(self): # can change this to whatever custom serialization you want to do, XML, etc # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return json.dumps(self.events) def agent_loop(thread: Thread) -> str: b = get_baml_client() while True: next_step = b.DetermineNextStep(thread.serialize_for_llm()) print("nextStep", next_step) if next_step.intent == "done_for_now": # response to human, return the next step object return next_step.message elif next_step.intent == "add": thread.events.append({ "type": "tool_call", "data": next_step.__dict__ }) result = next_step.a + next_step.b print("tool_response", result) thread.events.append({ "type": "tool_response", "data": result }) continue else: raise ValueError(f"Unknown intent: {next_step.intent}") ================================================ FILE: workshops/2025-07-16/walkthrough/03-main.py ================================================ def main(message="hello from the notebook!"): # Create a new thread with the user's message thread = Thread([{"type": "user_input", "data": message}]) # Run the agent loop with full tool handling result = agent_loop(thread) # Print the final response print(f"\nFinal response: {result}") ================================================ FILE: workshops/2025-07-16/walkthrough/03b-agent.py ================================================ import json from typing import Dict, Any, List, Union class Thread: def __init__(self, events: List[Dict[str, Any]]): self.events = events def serialize_for_llm(self): # can change this to whatever custom serialization you want to do, XML, etc # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return json.dumps(self.events) def handle_next_step(next_step, thread: Thread) -> Thread: result: float if next_step.intent == "add": result = next_step.a + next_step.b print("tool_response", result) thread.events.append({ "type": "tool_response", "data": result }) return thread elif next_step.intent == "subtract": result = next_step.a - next_step.b print("tool_response", result) thread.events.append({ "type": "tool_response", "data": result }) return thread elif next_step.intent == "multiply": result = next_step.a * next_step.b print("tool_response", result) thread.events.append({ "type": "tool_response", "data": result }) return thread elif next_step.intent == "divide": result = next_step.a / next_step.b print("tool_response", result) thread.events.append({ "type": "tool_response", "data": result }) return thread def agent_loop(thread: Thread) -> str: b = get_baml_client() while True: next_step = b.DetermineNextStep(thread.serialize_for_llm()) print("nextStep", next_step) thread.events.append({ "type": "tool_call", "data": next_step.__dict__ }) if next_step.intent == "done_for_now": # response to human, return the next step object return next_step.message elif next_step.intent in ["add", "subtract", "multiply", "divide"]: thread = handle_next_step(next_step, thread) ================================================ FILE: workshops/2025-07-16/walkthrough/03b-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { // can change this to whatever custom serialization you want to do, XML, etc // e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105 return JSON.stringify(this.events); } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": // response to human, return the next step object return nextStep.message; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-07-16/walkthrough/04-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } function DetermineNextStep( thread: string ) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } } test SimpleMath { functions [DetermineNextStep] args { thread #" [{"type": "user_input", "data": "can you multiply 3 and 4"}] "# } } ================================================ FILE: workshops/2025-07-16/walkthrough/04b-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } function DetermineNextStep( thread: string ) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent_check, {{this.intent == "done_for_now"}}) } test SimpleMath { functions [DetermineNextStep] args { thread #" [{"type": "user_input", "data": "can you multiply 3 and 4"}] "# } @@assert(intent_check, {{this.intent == "multiply"}}) @@assert(a_check, {{this.a == 3}}) @@assert(b_check, {{this.b == 4}}) } ================================================ FILE: workshops/2025-07-16/walkthrough/04c-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } function DetermineNextStep( thread: string ) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent_check, {{this.intent == "done_for_now"}}) } test SimpleMath { functions [DetermineNextStep] args { thread #" [{"type": "user_input", "data": "can you multiply 3 and 4"}] "# } @@assert(intent_check, {{this.intent == "multiply"}}) @@assert(a_check, {{this.a == 3}}) @@assert(b_check, {{this.b == 4}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ {"type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"}, {"type": "tool_call", "data": {"intent": "multiply", "a": 3, "b": 4}}, {"type": "tool_response", "data": 12}, {"type": "tool_call", "data": {"intent": "divide", "a": 12, "b": 2}}, {"type": "tool_response", "data": 6} ] "# } @@assert(intent_check, {{this.intent == "add"}}) @@assert(a_check, {{this.a == 6}}) @@assert(b_check, {{this.b == 12}}) } test CompleteConversation { functions [DetermineNextStep] args { thread #" [ {"type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"}, {"type": "tool_call", "data": {"intent": "multiply", "a": 3, "b": 4}}, {"type": "tool_response", "data": 12}, {"type": "tool_call", "data": {"intent": "divide", "a": 12, "b": 2}}, {"type": "tool_response", "data": 6}, {"type": "tool_call", "data": {"intent": "add", "a": 6, "b": 12}}, {"type": "tool_response", "data": 18} ] "# } @@assert(intent_check, {{this.intent == "done_for_now"}}) @@assert(answer_check, {{"18" in this.message}}) } ================================================ FILE: workshops/2025-07-16/walkthrough/05-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } class ClarificationRequest { intent "request_more_information" message string @description("you can request more information from the user") } function DetermineNextStep( thread: string ) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool | ClarificationRequest { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } ================================================ FILE: workshops/2025-07-16/walkthrough/05-agent.py ================================================ # Agent implementation with clarification support import json def agent_loop(thread, clarification_handler, max_iterations=3): """Run the agent loop until we get a final answer (max 3 iterations).""" iteration_count = 0 while iteration_count < max_iterations: iteration_count += 1 print(f"🔄 Agent loop iteration {iteration_count}/{max_iterations}") # Get the client baml_client = get_baml_client() # Serialize the thread thread_json = json.dumps(thread.events, indent=2) # Call the agent result = baml_client.DetermineNextStep(thread_json) # Check what type of result we got based on intent if hasattr(result, 'intent'): if result.intent == 'done_for_now': return result.message elif result.intent == 'request_more_information': # Get clarification from the human clarification = clarification_handler(result.message) # Add the clarification to the thread thread.events.append({ "type": "clarification_request", "data": result.message }) thread.events.append({ "type": "clarification_response", "data": clarification }) # Continue the loop with the clarification elif result.intent in ['add', 'subtract', 'multiply', 'divide']: # Execute the appropriate tool based on intent if result.intent == 'add': result_value = result.a + result.b operation = f"add({result.a}, {result.b})" elif result.intent == 'subtract': result_value = result.a - result.b operation = f"subtract({result.a}, {result.b})" elif result.intent == 'multiply': result_value = result.a * result.b operation = f"multiply({result.a}, {result.b})" elif result.intent == 'divide': if result.b == 0: result_value = "Error: Division by zero" else: result_value = result.a / result.b operation = f"divide({result.a}, {result.b})" print(f"🔧 Calling tool: {operation} = {result_value}") # Add the tool call and result to the thread thread.events.append({ "type": "tool_call", "data": { "tool": "calculator", "operation": operation, "result": result_value } }) else: return "Error: Unexpected result type" # If we've reached max iterations without a final answer return f"Agent reached maximum iterations ({max_iterations}) without completing the task." class Thread: """Simple thread to track conversation history.""" def __init__(self, events): self.events = events ================================================ FILE: workshops/2025-07-16/walkthrough/05-main.py ================================================ def get_human_input(prompt): """Get input from human, handling both Colab and local environments.""" print(f"\n🤔 {prompt}") if IN_COLAB: # In Colab, use actual input response = input("Your response: ") else: # In local testing, return a fixed response response = "I meant to multiply 3 and 4" print(f"📝 [Auto-response for testing]: {response}") return response def main(message="hello from the notebook!"): # Function to handle clarification requests def handle_clarification(question): return get_human_input(f"The agent needs clarification: {question}") # Create a new thread with the user's message thread = Thread([{"type": "user_input", "data": message}]) print(f"🚀 Starting agent with message: '{message}'") # Run the agent loop result = agent_loop(thread, handle_clarification) # Print the final response print(f"\n✅ Final response: {result}") ================================================ FILE: workshops/2025-07-16/walkthrough/05b-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "done_for_now"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-07-16/walkthrough/05c-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} "# } test HelloWorld { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "hello!" } "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" { "type": "user_input", "data": "can you multiply 3 and 4?" } "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" [ { "type": "user_input", "data": "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result?" }, { "type": "tool_call", "data": { "intent": "multiply", "a": 3, "b": 4 } }, { "type": "tool_response", "data": 12 }, { "type": "tool_call", "data": { "intent": "divide", "a": 12, "b": 2 } }, { "type": "tool_response", "data": 6 }, { "type": "tool_call", "data": { "intent": "add", "a": 6, "b": 12 } }, { "type": "tool_response", "data": 18 } ] "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" [{"type":"user_input","data":"can you multiply 3 and feee9ff10"}] "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" [ {"type":"user_input","data":"can you multiply 3 and FD*(#F&& ?"}, {"type":"tool_call","data":{"intent":"request_more_information","message":"It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply?"}}, {"type":"human_response","data":"lets try 12 instead"}, ] "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(a, {{this.b == 12}}) @@assert(b, {{this.a == 3}}) } ================================================ FILE: workshops/2025-07-16/walkthrough/06-agent.baml ================================================ class DoneForNow { intent "done_for_now" message string } class AddTool { intent "add" a int | float b int | float } class SubtractTool { intent "subtract" a int | float b int | float } class MultiplyTool { intent "multiply" a int | float b int | float } class DivideTool { intent "divide" a int | float b int | float } class ClarificationRequest { intent "request_more_information" message string @description("you can request more information from the user") } function DetermineNextStep( thread: string ) -> DoneForNow | AddTool | SubtractTool | MultiplyTool | DivideTool | ClarificationRequest { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} Before deciding on the next step, think through the situation: 1. What has been asked? 2. What information do I have? 3. What tools are available to me? 4. What is the most logical next step? Think step by step about what needs to be done next. What should the next step be? {{ ctx.output_format }} "# } ================================================ FILE: workshops/2025-07-16/walkthrough/07-agent.py ================================================ # Agent with configurable serialization formats import json class Thread: """Thread that can serialize to different formats.""" def __init__(self, events): self.events = events def serialize_as_json(self): """Serialize thread events to pretty-printed JSON.""" return json.dumps(self.events, indent=2) def serialize_as_xml(self): """Serialize thread events to XML format for better token efficiency.""" import yaml xml_parts = [""] for event in self.events: event_type = event['type'] event_data = event['data'] if event_type == 'user_input': xml_parts.append(f' {event_data}') elif event_type == 'tool_call': # Use YAML for tool call args - more compact than nested XML yaml_content = yaml.dump(event_data, default_flow_style=False).strip() xml_parts.append(f' <{event_data["tool"]}>') xml_parts.append(' ' + '\n '.join(yaml_content.split('\n'))) xml_parts.append(f' ') elif event_type == 'clarification_request': xml_parts.append(f' {event_data}') elif event_type == 'clarification_response': xml_parts.append(f' {event_data}') xml_parts.append("") return "\n".join(xml_parts) def agent_loop(thread, clarification_handler, use_xml=True): """Run the agent loop with configurable serialization.""" while True: # Get the client baml_client = get_baml_client() # Serialize the thread based on format preference if use_xml: thread_str = thread.serialize_as_xml() print(f"📄 Using XML serialization ({len(thread_str)} chars)") else: thread_str = thread.serialize_as_json() print(f"📄 Using JSON serialization ({len(thread_str)} chars)") # Call the agent result = baml_client.DetermineNextStep(thread_str) # Check what type of result we got based on intent if hasattr(result, 'intent'): if result.intent == 'done_for_now': return result.message elif result.intent == 'request_more_information': # Get clarification from the human clarification = clarification_handler(result.message) # Add the clarification to the thread thread.events.append({ "type": "clarification_request", "data": result.message }) thread.events.append({ "type": "clarification_response", "data": clarification }) # Continue the loop with the clarification elif result.intent in ['add', 'subtract', 'multiply', 'divide']: # Execute the appropriate tool based on intent if result.intent == 'add': result_value = result.a + result.b operation = f"add({result.a}, {result.b})" elif result.intent == 'subtract': result_value = result.a - result.b operation = f"subtract({result.a}, {result.b})" elif result.intent == 'multiply': result_value = result.a * result.b operation = f"multiply({result.a}, {result.b})" elif result.intent == 'divide': if result.b == 0: result_value = "Error: Division by zero" else: result_value = result.a / result.b operation = f"divide({result.a}, {result.b})" print(f"🔧 Calling tool: {operation} = {result_value}") # Add the tool call and result to the thread thread.events.append({ "type": "tool_call", "data": { "tool": "calculator", "operation": operation, "result": result_value } }) else: return "Error: Unexpected result type" ================================================ FILE: workshops/2025-07-16/walkthrough/07-main.py ================================================ def main(message="hello from the notebook!", use_xml=True): # Function to handle clarification requests def handle_clarification(question): return get_human_input(f"The agent needs clarification: {question}") # Create a new thread with the user's message thread = Thread([{"type": "user_input", "data": message}]) print(f"🚀 Starting agent with message: '{message}'") print(f"📋 Using {'XML' if use_xml else 'JSON'} format for thread serialization") # Run the agent loop with XML serialization result = agent_loop(thread, handle_clarification, use_xml=use_xml) # Print the final response print(f"\n✅ Final response: {result}") ================================================ FILE: workshops/2025-07-16/walkthrough/07b-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "add": case "subtract": case "multiply": case "divide": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-07-16/walkthrough/07c-agent.baml ================================================ // human tools are async requests to a human type HumanTools = ClarificationRequest | DoneForNow class ClarificationRequest { intent "request_more_information" @description("you can request more information from me") message string } class DoneForNow { intent "done_for_now" message string @description(#" message to send to the user about the work that was done. "#) } function DetermineNextStep( thread: string ) -> HumanTools | CalculatorTools { client "openai/gpt-4o" prompt #" {{ _.role("system") }} You are a helpful assistant that can help with tasks. {{ _.role("user") }} You are working on the following thread: {{ thread }} What should the next step be? {{ ctx.output_format }} Always think about what to do next first, like: - ... - ... - ... {...} // schema "# } test HelloWorld { functions [DetermineNextStep] args { thread #" hello! "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperation { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4? "# } @@assert(intent, {{this.intent == "multiply"}}) } test LongMath { functions [DetermineNextStep] args { thread #" can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result? a: 3 b: 4 12 a: 12 b: 2 6 a: 6 b: 12 18 "# } @@assert(intent, {{this.intent == "done_for_now"}}) @@assert(answer, {{"18" in this.message}}) } test MathOperationWithClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and fe1iiaff10 "# } @@assert(intent, {{this.intent == "request_more_information"}}) } test MathOperationPostClarification { functions [DetermineNextStep] args { thread #" can you multiply 3 and FD*(#F&& ? message: It seems like there was a typo or mistake in your request. Could you please clarify or provide the correct numbers you would like to multiply? lets try 12 instead "# } @@assert(intent, {{this.intent == "multiply"}}) @@assert(b, {{this.a == 3}}) @@assert(a, {{this.b == 12}}) } ================================================ FILE: workshops/2025-07-16/walkthrough/08-server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; const app = express(); app.use(express.json()); app.set('json spaces', 2); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const result = await agentLoop(thread); res.json(result); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { // optional - add state res.status(404).json({ error: "Not implemented yet" }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-07-16/walkthrough/09-server.ts ================================================ import express from 'express'; import { Thread, agentLoop } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } thread.events.push({ type: "human_response", data: req.body.message }); // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-07-16/walkthrough/09-state.ts ================================================ import crypto from 'crypto'; import { Thread } from '../src/agent'; // you can replace this with any simple state management, // e.g. redis, sqlite, postgres, etc export class ThreadStore { private threads: Map = new Map(); create(thread: Thread): string { const id = crypto.randomUUID(); this.threads.set(id, thread); return id; } get(id: string): Thread | undefined { return this.threads.get(id); } update(id: string, thread: Thread): void { this.threads.set(id, thread); } } ================================================ FILE: workshops/2025-07-16/walkthrough/10-agent.ts ================================================ import { AddTool, SubtractTool, DivideTool, MultiplyTool, b } from "../baml_client"; export interface Event { type: string data: any; } export class Thread { events: Event[] = []; constructor(events: Event[]) { this.events = events; } serializeForLLM() { return this.events.map(e => this.serializeOneEvent(e)).join("\n"); } trimLeadingWhitespace(s: string) { return s.replace(/^[ \t]+/gm, ''); } serializeOneEvent(e: Event) { return this.trimLeadingWhitespace(` <${e.data?.intent || e.type}> ${ typeof e.data !== 'object' ? e.data : Object.keys(e.data).filter(k => k !== 'intent').map(k => `${k}: ${e.data[k]}`).join("\n")} `) } awaitingHumanResponse(): boolean { const lastEvent = this.events[this.events.length - 1]; return ['request_more_information', 'done_for_now'].includes(lastEvent.data.intent); } awaitingHumanApproval(): boolean { const lastEvent = this.events[this.events.length - 1]; return lastEvent.data.intent === 'divide'; } } export type CalculatorTool = AddTool | SubtractTool | MultiplyTool | DivideTool; export async function handleNextStep(nextStep: CalculatorTool, thread: Thread): Promise { let result: number; switch (nextStep.intent) { case "add": result = nextStep.a + nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "subtract": result = nextStep.a - nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "multiply": result = nextStep.a * nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; case "divide": result = nextStep.a / nextStep.b; console.log("tool_response", result); thread.events.push({ "type": "tool_response", "data": result }); return thread; } } export async function agentLoop(thread: Thread): Promise { while (true) { const nextStep = await b.DetermineNextStep(thread.serializeForLLM()); console.log("nextStep", nextStep); thread.events.push({ "type": "tool_call", "data": nextStep }); switch (nextStep.intent) { case "done_for_now": case "request_more_information": // response to human, return the thread return thread; case "divide": // divide is scary, return it for human approval return thread; case "add": case "subtract": case "multiply": thread = await handleNextStep(nextStep, thread); } } } ================================================ FILE: workshops/2025-07-16/walkthrough/10-server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const newThread = await agentLoop(thread); store.update(req.params.id, newThread); lastEvent = newThread.events[newThread.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(newThread); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-07-16/walkthrough/11-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, } } }) if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-07-16/walkthrough/11b-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-07-16/walkthrough/11c-cli.ts ================================================ // cli.ts lets you invoke the agent loop from the command line import { humanlayer } from "humanlayer"; import { agentLoop, Thread, Event } from "../src/agent"; export async function cli() { // Get command line arguments, skipping the first two (node and script name) const args = process.argv.slice(2); if (args.length === 0) { console.error("Error: Please provide a message as a command line argument"); process.exit(1); } // Join all arguments into a single message const message = args.join(" "); // Create a new thread with the user's message as the initial event const thread = new Thread([{ type: "user_input", data: message }]); // Run the agent loop with the thread let newThread = await agentLoop(thread); let lastEvent = newThread.events.slice(-1)[0]; while (lastEvent.data.intent !== "done_for_now") { const responseEvent = await askHuman(lastEvent); thread.events.push(responseEvent); newThread = await agentLoop(thread); lastEvent = newThread.events.slice(-1)[0]; } // print the final result // optional - you could loop here too console.log(lastEvent.data.message); process.exit(0); } async function askHuman(lastEvent: Event): Promise { if (process.env.HUMANLAYER_API_KEY) { return await askHumanEmail(lastEvent); } else { return await askHumanCLI(lastEvent.data.message); } } async function askHumanCLI(message: string): Promise { const readline = require('readline').createInterface({ input: process.stdin, output: process.stdout }); return new Promise((resolve) => { readline.question(`${message}\n> `, (answer: string) => { resolve({ type: "human_response", data: answer }); }); }); } export async function askHumanEmail(lastEvent: Event): Promise { if (!process.env.HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const hl = humanlayer({ //reads apiKey from env // name of this agent runId: "12fa-cli-agent", verbose: true, contactChannel: { // agent should request permission via email email: { address: process.env.HUMANLAYER_EMAIL, // custom email body - jinja template: `{% if type == 'request_more_information' %} {{ event.spec.msg }} {% else %} agent {{ event.run_id }} is requesting approval for {{event.spec.fn}} with args: {{event.spec.kwargs}}

reply to this email to approve {% endif %}` } } }) if (lastEvent.data.intent === "request_more_information") { // fetch response synchronously - this will block until reply const response = await hl.fetchHumanResponse({ spec: { msg: lastEvent.data.message } }) return { "type": "tool_response", "data": response } } if (lastEvent.data.intent === "divide") { // fetch approval synchronously - this will block until reply const response = await hl.fetchHumanApproval({ spec: { fn: "divide", kwargs: { a: lastEvent.data.a, b: lastEvent.data.b } } }) if (response.approved) { const result = lastEvent.data.a / lastEvent.data.b; console.log("tool_response", result); return { "type": "tool_response", "data": result }; } else { return { "type": "tool_response", "data": `user denied operation ${lastEvent.data.intent} with feedback: ${response.comment}` }; } } throw new Error(`unknown tool: ${lastEvent.data.intent}`) } ================================================ FILE: workshops/2025-07-16/walkthrough/12-1-server-init.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const result = await agentLoop(thread); store.update(req.params.id, result); lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(result); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-07-16/walkthrough/12-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-07-16/walkthrough/12a-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-07-16/walkthrough/12aa-server.ts ================================================ import express, { Request, Response } from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { humanlayer, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); const getHumanlayer = () => { const HUMANLAYER_EMAIL = process.env.HUMANLAYER_EMAIL; if (!HUMANLAYER_EMAIL) { throw new Error("missing or invalid parameters: HUMANLAYER_EMAIL"); } const HUMANLAYER_API_KEY = process.env.HUMANLAYER_API_KEY; if (!HUMANLAYER_API_KEY) { throw new Error("missing or invalid parameters: HUMANLAYER_API_KEY"); } return humanlayer({ runId: `12fa-agent`, contactChannel: { email: { address: HUMANLAYER_EMAIL } } }); } // POST /thread - Start new thread app.post('/thread', async (req: Request, res: Response) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); // run agent loop asynchronously, return immediately Promise.resolve().then(async () => { const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; if (thread.awaitingHumanResponse()) { const hl = getHumanlayer(); // create a human contact - returns immediately hl.createHumanContact({ spec: { msg: lastEvent.data.message, state: { thread_id: threadId, } } }); } }); res.json({ status: "processing" }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req: Request, res: Response) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type WebhookResponse = V1Beta2HumanContactCompleted; const handleHumanResponse = async (req: Request, res: Response) => { } app.post('/webhook', async (req: Request, res: Response) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-07-16/walkthrough/12b-server.ts ================================================ import express from 'express'; import { Thread, agentLoop, handleNextStep } from '../src/agent'; import { ThreadStore } from '../src/state'; import { V1Beta2EmailEventReceived, V1Beta2FunctionCallCompleted, V1Beta2HumanContactCompleted } from 'humanlayer'; const app = express(); app.use(express.json()); app.set('json spaces', 2); const store = new ThreadStore(); // POST /thread - Start new thread app.post('/thread', async (req, res) => { const thread = new Thread([{ type: "user_input", data: req.body.message }]); const threadId = store.create(thread); const newThread = await agentLoop(thread); store.update(threadId, newThread); const lastEvent = newThread.events[newThread.events.length - 1]; // If we exited the loop, include the response URL so the client can // push a new message onto the thread lastEvent.data.response_url = `/thread/${threadId}/response`; console.log("returning last event from endpoint", lastEvent); res.json({ thread_id: threadId, ...newThread }); }); // GET /thread/:id - Get thread status app.get('/thread/:id', (req, res) => { const thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } res.json(thread); }); type ApprovalPayload = { type: "approval"; approved: boolean; comment?: string; } type ResponsePayload = { type: "response"; response: string; } type Payload = ApprovalPayload | ResponsePayload; // POST /thread/:id/response - Handle clarification response app.post('/thread/:id/response', async (req, res) => { let thread = store.get(req.params.id); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } const body: Payload = req.body; let lastEvent = thread.events[thread.events.length - 1]; if (thread.awaitingHumanResponse() && body.type === 'response') { thread.events.push({ type: "human_response", data: body.response }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && !body.approved) { // push feedback onto the thread thread.events.push({ type: "tool_response", data: `user denied the operation with feedback: "${body.comment}"` }); } else if (thread.awaitingHumanApproval() && body.type === 'approval' && body.approved) { // approved, run the tool, pushing results onto the thread await handleNextStep(lastEvent.data, thread); } else { res.status(400).json({ error: "Invalid request: " + body.type, awaitingHumanResponse: thread.awaitingHumanResponse(), awaitingHumanApproval: thread.awaitingHumanApproval() }); return; } // loop until stop event const result = await agentLoop(thread); store.update(req.params.id, result); lastEvent = result.events[result.events.length - 1]; lastEvent.data.response_url = `/thread/${req.params.id}/response`; console.log("returning last event from endpoint", lastEvent); res.json(result); }); type WebhookResponse = V1Beta2HumanContactCompleted; app.post('/webhook/response', async (req, res) => { console.log("webhook response", req.body); const response = req.body as WebhookResponse; // response is guaranteed to be set on a webhook const humanResponse: string = response.event.status?.response as string; const threadId = response.event.spec.state?.thread_id; if (!threadId) { return res.status(400).json({ error: "Thread ID not found" }); } const thread = store.get(threadId); if (!thread) { return res.status(404).json({ error: "Thread not found" }); } if (!thread.awaitingHumanResponse()) { return res.status(400).json({ error: "Thread is not awaiting human response" }); } thread.events.push({ type: "human_response", data: response.event.status?.response }); }); const port = process.env.PORT || 3000; app.listen(port, () => { console.log(`Server running on port ${port}`); }); export { app }; ================================================ FILE: workshops/2025-07-16/walkthrough.yaml ================================================ title: "Building the 12-factor agent template from scratch in Python" text: "Steps to start from a bare Python repo and build up a 12-factor agent. This walkthrough will guide you through creating a Python agent that follows the 12-factor methodology with BAML." targets: - ipynb: "./build/workshop-2025-07-16.ipynb" sections: - name: hello-world title: "Chapter 0 - Hello World" text: "Let's start with a basic Python setup and a hello world program." steps: - text: | This guide will walk you through building agents in Python with BAML. We'll start simple with a hello world program and gradually build up to a full agent. For this notebook, you'll need to have your OpenAI API key saved in Google Colab secrets. ## Where We're Headed Before we dive in, let's understand the journey ahead. We're building toward **micro-agents in deterministic DAGs** - a powerful pattern that combines the flexibility of AI with the reliability of traditional software. 📖 **Learn more**: [A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) ![Software DAG Evolution](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/010-software-dag.png) - text: "Here's our simple hello world program:" - file: {src: ./walkthrough/00-main.py} - text: "Let's run it to verify it works:" - run_main: {regenerate_baml: false} - name: cli-and-agent title: "Chapter 1 - CLI and Agent Loop" text: "Now let's add BAML and create our first agent with a CLI interface." steps: - text: | In this chapter, we'll integrate BAML to create an AI agent that can respond to user input. ## What is BAML? BAML (Boundary Markup Language) is a domain-specific language designed to help developers build reliable AI workflows and agents. Created by [BoundaryML](https://www.boundaryml.com/) (a Y Combinator W23 company), BAML adds the engineering to prompt engineering. ### Why BAML? - **Type-safe outputs**: Get fully type-safe outputs from LLMs, even when streaming - **Language agnostic**: Works with Python, TypeScript, Ruby, Go, and more - **LLM agnostic**: Works with any LLM provider (OpenAI, Anthropic, etc.) - **Better performance**: State-of-the-art structured outputs that outperform even OpenAI's native function calling - **Developer-friendly**: Native VSCode extension with syntax highlighting, autocomplete, and interactive playground ### Learn More - 📚 [Official Documentation](https://docs.boundaryml.com/home) - 💻 [GitHub Repository](https://github.com/BoundaryML/baml) - 🎯 [What is BAML?](https://docs.boundaryml.com/guide/introduction/what-is-baml) - 📖 [BAML Examples](https://github.com/BoundaryML/baml-examples) - 🏢 [Company Website](https://www.boundaryml.com/) - 📰 [Blog: AI Agents Need a New Syntax](https://www.boundaryml.com/blog/ai-agents-need-new-syntax) BAML turns prompt engineering into schema engineering, where you focus on defining the structure of your data rather than wrestling with prompts. This approach leads to more reliable and maintainable AI applications. ### Note on Developer Experience BAML works much better in VS Code with their official extension, which provides syntax highlighting, autocomplete, inline testing, and an interactive playground. However, for this notebook tutorial, we'll work with BAML files directly without the enhanced IDE features. ## Factor 1: Natural Language to Tool Calls What we're building implements the first factor of 12-factor agents - converting natural language into structured tool calls. 📖 **Learn more**: [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) ![Natural Language to Tool Calls](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/110-natural-language-tool-calls.png) First, let's set up BAML support in our notebook. - baml_setup: true - command: "!ls baml_src" - text: | Now let's create our agent that will use BAML to process user input. First, we'll define the core agent logic: - file: {src: ./walkthrough/01-agent.py} - text: | Next, we need to define the BAML function that our agent will use. ### Understanding BAML Syntax BAML files define: - **Classes**: Structured output schemas (like `DoneForNow` below) - **Functions**: AI-powered functions that take inputs and return structured outputs - **Tests**: Example inputs/outputs to validate your prompts This BAML file defines what our agent can do: - fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml} - command: "!ls baml_src" - text: | Now let's create our main function that accepts a message parameter: - file: {src: ./walkthrough/01-main.py} - text: | Let's test our agent! Try calling main() with different messages: - `main("What's the weather like?")` - `main("Tell me a joke")` - `main("How are you doing today?")` in this case, we'll use the baml_generate function to generate the pydantic and python bindings from our baml source, but in the future we'll skip this step as it is done automatically by the get_baml_client() function - run_main: {regenerate_baml: true, args: "Hello from the Python notebook!"} - name: calculator-tools title: "Chapter 2 - Add Calculator Tools" text: "Let's add some calculator tools to our agent." steps: - text: | Let's start by adding a tool definition for the calculator. These are simple structured outputs that we'll ask the model to return as a "next step" in the agentic loop. ## Factor 4: Tools Are Structured Outputs This chapter demonstrates that tools are just structured JSON outputs from the LLM - nothing more complex! 📖 **Learn more**: [Factor 4: Tools Are Structured Outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) ![Tools Are Structured Outputs](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/140-tools-are-just-structured-outputs.png) - fetch_file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml} - command: "!ls baml_src" - text: | Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps. - fetch_file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml} - text: | Now let's update our main function to show the tool call: - file: {src: ./walkthrough/02-main.py} - text: | Let's try out the calculator! The agent should recognize that you want to perform a calculation and return the appropriate tool call instead of just a message. - run_main: {regenerate_baml: false, args: "can you add 3 and 4"} - name: tool-loop title: "Chapter 3 - Process Tool Calls in a Loop" text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM." steps: - text: | In this chapter, we'll enhance our agent to process tool calls in a loop. This means: - The agent can call multiple tools in sequence - Each tool result is fed back to the agent - The agent continues until it has a final answer ## The Agent Loop Pattern We're implementing the core agent loop - where the AI makes decisions, executes tools, and continues until done. ![Agent Loop Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/027-agent-loop-animation.gif) ## Factor 5: Unify Execution State Notice how we're storing everything as events in our Thread - this is Factor 5 in action! 📖 **Learn more**: [Factor 5: Unify Execution State](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) ![Unify State Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/155-unify-state-animation.gif) Let's update our agent to handle tool calls properly: - file: {src: ./walkthrough/03-agent.py} - text: | Now let's update our main function to use the new agent loop: - file: {src: ./walkthrough/03-main.py} - text: | Let's try it out! The agent should now call the tool and return the calculated result: - run_main: {regenerate_baml: false, args: "can you add 3 and 4"} - text: | You should see the agent: 1. Recognize it needs to use the add tool 2. Call the tool with the correct parameters 3. Get the result (7) 4. Generate a final response incorporating the result For more complex calculations, we need to handle all calculator operations. Let's add support for subtract, multiply, and divide: - file: {src: ./walkthrough/03b-agent.py} - text: | Now let's test subtraction: - run_main: {regenerate_baml: false, args: "can you subtract 3 from 4"} - text: | Test multiplication: - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4"} - text: | Finally, let's test a complex multi-step calculation: - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"} - text: | Congratulations! You've taken your first step into hand-rolling an agent loop. Key concepts you've learned: - **Thread Management**: Tracking conversation history and tool calls - **Tool Execution**: Processing different tool types and returning results - **Agent Loop**: Continuing until the agent has a final answer From here, we'll start incorporating more intermediate and advanced concepts for 12-factor agents. - name: baml-tests title: "Chapter 4 - Add Tests to agent.baml" text: "Let's add some tests to our BAML agent." steps: - text: | In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly. ## Why Test BAML Functions? - **Catch regressions**: Ensure changes don't break existing behavior - **Document behavior**: Tests serve as living documentation - **Validate edge cases**: Test complex scenarios and conversation flows - **CI/CD integration**: Run tests automatically in your pipeline Let's start with a simple test that checks the agent's ability to handle basic interactions: - fetch_file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml} - text: | Run the tests to see them in action: - command: "!baml-cli test" - text: | Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output. ## BAML Assertion Syntax Assertions use the `@@assert` directive: ``` @@assert(name, {{condition}}) ``` - `name`: A descriptive name for the assertion - `condition`: A boolean expression using `this` to access the output - fetch_file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml} - text: | Run the tests again to see assertions in action: - command: "!baml-cli test" - text: | Finally, let's add more complex test cases that test multi-step conversations. These tests simulate an entire conversation flow, including: - User input - Tool calls made by the agent - Tool responses - Final agent response - fetch_file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml} - text: | Run the comprehensive test suite: - command: "!baml-cli test" - text: | ## Key Testing Concepts 1. **Test Structure**: Each test specifies functions, arguments, and assertions 2. **Progressive Testing**: Start simple, then test complex scenarios 3. **Conversation History**: Test how the agent handles multi-turn conversations 4. **Tool Integration**: Verify the agent correctly uses tools in sequence With these tests in place, you can confidently modify your agent knowing that core functionality is protected by automated tests! - name: human-tools title: "Chapter 5 - Multiple Human Tools" text: | In this section, we'll add support for multiple tools that serve to contact humans. steps: - text: | So far, our agent only returns a final answer with "done_for_now". But what if the agent needs clarification? Let's add a new tool that allows the agent to request more information from the user. ## Why Human-in-the-Loop? - **Handle ambiguous inputs**: When user input is unclear or contains typos - **Request missing information**: When the agent needs more context - **Confirm sensitive operations**: Before performing important actions - **Interactive workflows**: Build conversational agents that engage users First, let's update our BAML file to include a ClarificationRequest tool: - fetch_file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml} - text: | Now let's update our agent to handle clarification requests: - file: {src: ./walkthrough/05-agent.py} - text: | Finally, let's create a main function that handles human interaction: - file: {src: ./walkthrough/05-main.py} - text: | Let's test with an ambiguous input that should trigger a clarification request: - run_main: {regenerate_baml: false, args: "can you multiply 3 and FD*(#F&&"} - text: | You should see: 1. The agent recognizes the input is unclear 2. It asks for clarification 3. In Colab, you'll be prompted to type a response 4. In local testing, an auto-response is provided 5. The agent continues with the clarified input ## Interactive Testing in Colab When running in Google Colab, the `input()` function will create an interactive text box where you can type your response. Try different clarifications to see how the agent adapts! ## Key Concepts - **Human Tools**: Special tool types that return control to the human - **Conversation Flow**: The agent can pause execution to get human input - **Context Preservation**: The full conversation history is maintained - **Flexible Handling**: Different behaviors for different environments - name: customize-prompt title: "Chapter 6 - Customize Your Prompt with Reasoning" text: | In this section, we'll explore how to customize the prompt of the agent with reasoning steps. This is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) steps: - text: | ## Why Add Reasoning to Prompts? Adding explicit reasoning steps to your prompts can significantly improve agent performance: - **Better decisions**: The model thinks through problems step-by-step - **Transparency**: You can see the model's thought process - **Fewer errors**: Structured thinking reduces mistakes - **Debugging**: Easier to identify where reasoning went wrong Let's update our agent prompt to include a reasoning step: - fetch_file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml} - text: | Now let's test it with a simple calculation to see the reasoning in action: - run_main: {args: "can you multiply 3 and 4"} - text: | The model uses explicit reasoning steps to think through the problem before making a decision. ## Advanced Prompt Engineering You can enhance your prompts further by: - Adding specific reasoning templates for different tasks - Including examples of good reasoning - Structuring the reasoning with numbered steps - Adding checks for common mistakes The key is to guide the model's thinking process while still allowing flexibility. - name: context-window title: "Chapter 7 - Customize Your Context Window" text: | In this section, we'll explore how to customize the context window of the agent. This is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) steps: - text: | ## Context Window Serialization How you format your conversation history can significantly impact: - **Token usage**: Some formats are more efficient - **Model understanding**: Clear structure helps the model - **Debugging**: Readable formats help development Let's implement two serialization formats: pretty-printed JSON and XML. - file: {src: ./walkthrough/07-agent.py} - text: | Now let's create a main function that can switch between formats: - file: {src: ./walkthrough/07-main.py} - text: | Let's test with JSON format first: - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: false}} - text: | Now let's try the same with XML format: - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: true}} - text: | ## XML vs JSON Trade-offs **XML Benefits**: - More token-efficient for nested data - Clear hierarchy with opening/closing tags - Better for long conversations **JSON Benefits**: - Familiar to most developers - Easy to parse and debug - Native to JavaScript/Python Choose based on your specific needs and token constraints! ================================================ FILE: workshops/2025-07-16/walkthrough_python_enhanced.yaml ================================================ title: "Building the 12-factor agent template from scratch in Python" text: "Steps to start from a bare Python repo and build up a 12-factor agent. This walkthrough will guide you through creating a Python agent that follows the 12-factor methodology with BAML." targets: - ipynb: "./build/workshop-2025-07-16.ipynb" sections: - name: hello-world title: "Chapter 0 - Hello World" text: "Let's start with a basic Python setup and a hello world program." steps: - text: | This guide will walk you through building agents in Python with BAML. We'll start simple with a hello world program and gradually build up to a full agent. For this notebook, you'll need to have your OpenAI API key saved in Google Colab secrets. ## Where We're Headed Before we dive in, let's understand the journey ahead. We're building toward **micro-agents in deterministic DAGs** - a powerful pattern that combines the flexibility of AI with the reliability of traditional software. 📖 **Learn more**: [A Brief History of Software](https://github.com/humanlayer/12-factor-agents/blob/main/content/brief-history-of-software.md) ![Software DAG Evolution](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/010-software-dag.png) - text: "Here's our simple hello world program:" - file: {src: ./walkthrough/00-main.py} - text: "Let's run it to verify it works:" - run_main: {regenerate_baml: false} - name: cli-and-agent title: "Chapter 1 - CLI and Agent Loop" text: "Now let's add BAML and create our first agent with a CLI interface." steps: - text: | In this chapter, we'll integrate BAML to create an AI agent that can respond to user input. ## What is BAML? BAML (Boundary Markup Language) is a domain-specific language designed to help developers build reliable AI workflows and agents. Created by [BoundaryML](https://www.boundaryml.com/) (a Y Combinator W23 company), BAML adds the engineering to prompt engineering. ### Why BAML? - **Type-safe outputs**: Get fully type-safe outputs from LLMs, even when streaming - **Language agnostic**: Works with Python, TypeScript, Ruby, Go, and more - **LLM agnostic**: Works with any LLM provider (OpenAI, Anthropic, etc.) - **Better performance**: State-of-the-art structured outputs that outperform even OpenAI's native function calling - **Developer-friendly**: Native VSCode extension with syntax highlighting, autocomplete, and interactive playground ### Learn More - 📚 [Official Documentation](https://docs.boundaryml.com/home) - 💻 [GitHub Repository](https://github.com/BoundaryML/baml) - 🎯 [What is BAML?](https://docs.boundaryml.com/guide/introduction/what-is-baml) - 📖 [BAML Examples](https://github.com/BoundaryML/baml-examples) - 🏢 [Company Website](https://www.boundaryml.com/) - 📰 [Blog: AI Agents Need a New Syntax](https://www.boundaryml.com/blog/ai-agents-need-new-syntax) BAML turns prompt engineering into schema engineering, where you focus on defining the structure of your data rather than wrestling with prompts. This approach leads to more reliable and maintainable AI applications. ### Note on Developer Experience BAML works much better in VS Code with their official extension, which provides syntax highlighting, autocomplete, inline testing, and an interactive playground. However, for this notebook tutorial, we'll work with BAML files directly without the enhanced IDE features. ## Factor 1: Natural Language to Tool Calls What we're building implements the first factor of 12-factor agents - converting natural language into structured tool calls. 📖 **Learn more**: [Factor 1: Natural Language to Tool Calls](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-01-natural-language-to-tool-calls.md) ![Natural Language to Tool Calls](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/110-natural-language-tool-calls.png) First, let's set up BAML support in our notebook. - baml_setup: true - text: | Now let's create our agent that will use BAML to process user input. First, we'll define the core agent logic: - file: {src: ./walkthrough/01-agent.py} - text: | Next, we need to define the BAML function that our agent will use. ### Understanding BAML Syntax BAML files define: - **Classes**: Structured output schemas (like `DoneForNow` below) - **Functions**: AI-powered functions that take inputs and return structured outputs - **Tests**: Example inputs/outputs to validate your prompts This BAML file defines what our agent can do: - fetch_file: {src: ./walkthrough/01-agent.baml, dest: baml_src/agent.baml} - text: | Now let's create our main function that accepts a message parameter: - file: {src: ./walkthrough/01-main.py} - text: | Let's test our agent! Try calling main() with different messages: - `main("What's the weather like?")` - `main("Tell me a joke")` - `main("How are you doing today?")` - run_main: {regenerate_baml: true, args: "Hello from the Python notebook!"} - name: calculator-tools title: "Chapter 2 - Add Calculator Tools" text: "Let's add some calculator tools to our agent." steps: - text: | Let's start by adding a tool definition for the calculator. These are simple structured outputs that we'll ask the model to return as a "next step" in the agentic loop. ## Factor 4: Tools Are Structured Outputs This chapter demonstrates that tools are just structured JSON outputs from the LLM - nothing more complex! 📖 **Learn more**: [Factor 4: Tools Are Structured Outputs](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-04-tools-are-structured-outputs.md) ![Tools Are Structured Outputs](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/140-tools-are-just-structured-outputs.png) - fetch_file: {src: ./walkthrough/02-tool_calculator.baml, dest: baml_src/tool_calculator.baml} - text: | Now, let's update the agent's DetermineNextStep method to expose the calculator tools as potential next steps. - fetch_file: {src: ./walkthrough/02-agent.baml, dest: baml_src/agent.baml} - text: | Now let's update our main function to show the tool call: - file: {src: ./walkthrough/02-main.py} - text: | Let's try out the calculator! The agent should recognize that you want to perform a calculation and return the appropriate tool call instead of just a message. - run_main: {regenerate_baml: true, args: "can you add 3 and 4"} - name: tool-loop title: "Chapter 3 - Process Tool Calls in a Loop" text: "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM." steps: - text: | In this chapter, we'll enhance our agent to process tool calls in a loop. This means: - The agent can call multiple tools in sequence - Each tool result is fed back to the agent - The agent continues until it has a final answer ## The Agent Loop Pattern We're implementing the core agent loop - where the AI makes decisions, executes tools, and continues until done. ![Agent Loop Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/027-agent-loop-animation.gif) ## Factor 5: Unify Execution State Notice how we're storing everything as events in our Thread - this is Factor 5 in action! 📖 **Learn more**: [Factor 5: Unify Execution State](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-05-unify-execution-state.md) ![Unify State Animation](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/155-unify-state-animation.gif) Let's update our agent to handle tool calls properly: - file: {src: ./walkthrough/03-agent.py} - text: | Now let's update our main function to use the new agent loop: - file: {src: ./walkthrough/03-main.py} - text: | Let's try it out! The agent should now call the tool and return the calculated result: - run_main: {regenerate_baml: true, args: "can you add 3 and 4"} - text: | You should see the agent: 1. Recognize it needs to use the add tool 2. Call the tool with the correct parameters 3. Get the result (7) 4. Generate a final response incorporating the result For more complex calculations, we need to handle all calculator operations. Let's add support for subtract, multiply, and divide: - file: {src: ./walkthrough/03b-agent.py} - text: | Now let's test subtraction: - run_main: {regenerate_baml: false, args: "can you subtract 3 from 4"} - text: | Test multiplication: - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4"} - text: | Finally, let's test a complex multi-step calculation: - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result"} - text: | Congratulations! You've taken your first step into hand-rolling an agent loop. Key concepts you've learned: - **Thread Management**: Tracking conversation history and tool calls - **Tool Execution**: Processing different tool types and returning results - **Agent Loop**: Continuing until the agent has a final answer From here, we'll start incorporating more intermediate and advanced concepts for 12-factor agents. - name: baml-tests title: "Chapter 4 - Add Tests to agent.baml" text: "Let's add some tests to our BAML agent." steps: - text: | In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly. ## Why Test BAML Functions? - **Catch regressions**: Ensure changes don't break existing behavior - **Document behavior**: Tests serve as living documentation - **Validate edge cases**: Test complex scenarios and conversation flows - **CI/CD integration**: Run tests automatically in your pipeline ## Factor 2: Own Your Prompts Testing is a key part of owning your prompts - you need to verify they work as expected! 📖 **Learn more**: [Factor 2: Own Your Prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) ![Own Your Prompts](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/120-own-your-prompts.png) Let's start with a simple test that checks the agent's ability to handle basic interactions: - fetch_file: {src: ./walkthrough/04-agent.baml, dest: baml_src/agent.baml} - text: | Run the tests to see them in action: - command: "!baml-cli test" - text: | Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output. ## BAML Assertion Syntax Assertions use the `@@assert` directive: ``` @@assert(name, {{condition}}) ``` - `name`: A descriptive name for the assertion - `condition`: A boolean expression using `this` to access the output - fetch_file: {src: ./walkthrough/04b-agent.baml, dest: baml_src/agent.baml} - text: | Run the tests again to see assertions in action: - command: "!baml-cli test" - text: | Finally, let's add more complex test cases that test multi-step conversations. These tests simulate an entire conversation flow, including: - User input - Tool calls made by the agent - Tool responses - Final agent response - fetch_file: {src: ./walkthrough/04c-agent.baml, dest: baml_src/agent.baml} - text: | Run the comprehensive test suite: - command: "!baml-cli test" - text: | ## Key Testing Concepts 1. **Test Structure**: Each test specifies functions, arguments, and assertions 2. **Progressive Testing**: Start simple, then test complex scenarios 3. **Conversation History**: Test how the agent handles multi-turn conversations 4. **Tool Integration**: Verify the agent correctly uses tools in sequence With these tests in place, you can confidently modify your agent knowing that core functionality is protected by automated tests! - name: human-tools title: "Chapter 5 - Multiple Human Tools" text: | In this section, we'll add support for multiple tools that serve to contact humans. steps: - text: | So far, our agent only returns a final answer with "done_for_now". But what if the agent needs clarification? Let's add a new tool that allows the agent to request more information from the user. ## Why Human-in-the-Loop? - **Handle ambiguous inputs**: When user input is unclear or contains typos - **Request missing information**: When the agent needs more context - **Confirm sensitive operations**: Before performing important actions - **Interactive workflows**: Build conversational agents that engage users ## Factor 7: Contact Humans with Tools This is a critical pattern - treating human interaction as just another tool call! 📖 **Learn more**: [Factor 7: Contact Humans with Tools](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-07-contact-humans-with-tools.md) ![Contact Humans with Tools](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/170-contact-humans-with-tools.png) This enables **outer-loop agents** - agents that can pause execution and wait for human input: ![Outer Loop Agents](https://raw.githubusercontent.com/humanlayer/12-factor-agents/main/img/175-outer-loop-agents.png) First, let's update our BAML file to include a ClarificationRequest tool: - fetch_file: {src: ./walkthrough/05-agent.baml, dest: baml_src/agent.baml} - text: | Now let's update our agent to handle clarification requests: - file: {src: ./walkthrough/05-agent.py} - text: | Finally, let's create a main function that handles human interaction: - file: {src: ./walkthrough/05-main.py} - text: | Let's test with an ambiguous input that should trigger a clarification request: - run_main: {regenerate_baml: true, args: "can you multiply 3 and FD*(#F&&"} - text: | You should see: 1. The agent recognizes the input is unclear 2. It asks for clarification 3. In Colab, you'll be prompted to type a response 4. In local testing, an auto-response is provided 5. The agent continues with the clarified input ## Interactive Testing in Colab When running in Google Colab, the `input()` function will create an interactive text box where you can type your response. Try different clarifications to see how the agent adapts! ## Key Concepts - **Human Tools**: Special tool types that return control to the human - **Conversation Flow**: The agent can pause execution to get human input - **Context Preservation**: The full conversation history is maintained - **Flexible Handling**: Different behaviors for different environments - name: customize-prompt title: "Chapter 6 - Customize Your Prompt with Reasoning" text: | In this section, we'll explore how to customize the prompt of the agent with reasoning steps. This is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md) steps: - text: | ## Why Add Reasoning to Prompts? Adding explicit reasoning steps to your prompts can significantly improve agent performance: - **Better decisions**: The model thinks through problems step-by-step - **Transparency**: You can see the model's thought process - **Fewer errors**: Structured thinking reduces mistakes - **Debugging**: Easier to identify where reasoning went wrong ## Factor 2: Own Your Prompts This chapter demonstrates taking full control of your prompts - they're first-class code! 📖 **Learn more**: [Factor 2: Own Your Prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-02-own-your-prompts.md) Let's update our agent prompt to include a reasoning step: - fetch_file: {src: ./walkthrough/06-agent.baml, dest: baml_src/agent.baml} - text: | Now let's test it with a simple calculation to see the reasoning in action: - run_main: {regenerate_baml: true, args: "can you multiply 3 and 4"} - text: | You should notice in the BAML logs (if enabled) that the model now includes reasoning steps before deciding what to do. ## Advanced Prompt Engineering You can enhance your prompts further by: - Adding specific reasoning templates for different tasks - Including examples of good reasoning - Structuring the reasoning with numbered steps - Adding checks for common mistakes The key is to guide the model's thinking process while still allowing flexibility. - name: context-window title: "Chapter 7 - Customize Your Context Window" text: | In this section, we'll explore how to customize the context window of the agent. This is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md) steps: - text: | ## Context Window Serialization How you format your conversation history can significantly impact: - **Token usage**: Some formats are more efficient - **Model understanding**: Clear structure helps the model - **Debugging**: Readable formats help development ## Factor 3: Own Your Context Window Context engineering is everything! This is one of the most important factors. 📖 **Learn more**: [Factor 3: Own Your Context Window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-03-own-your-context-window.md) Let's implement two serialization formats: pretty-printed JSON and XML. - file: {src: ./walkthrough/07-agent.py} - text: | Now let's create a main function that can switch between formats: - file: {src: ./walkthrough/07-main.py} - text: | Let's test with JSON format first: - run_main: {regenerate_baml: true, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: false}} - text: | Now let's try the same with XML format: - run_main: {regenerate_baml: false, args: "can you multiply 3 and 4, then divide the result by 2", kwargs: {use_xml: true}} - text: | ## XML vs JSON Trade-offs **XML Benefits**: - More token-efficient for nested data - Clear hierarchy with opening/closing tags - Better for long conversations **JSON Benefits**: - Familiar to most developers - Easy to parse and debug - Native to JavaScript/Python Choose based on your specific needs and token constraints! ## What's Next? In the remaining chapters (8-12), we'll build on these foundations to add: - **API endpoints** for serving your agent - **State persistence** with async operations - **Human approval workflows** (Factor 8: Own Your Control Flow) - **Email-based approvals** via HumanLayer - **Webhook integration** for launch/pause/resume patterns (Factor 6) Each step brings us closer to production-ready agents that can handle real-world complexity! 📖 **Further Reading**: - [Factor 6: Launch/Pause/Resume](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-06-launch-pause-resume.md) - [Factor 8: Own Your Control Flow](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-08-own-your-control-flow.md) - [Factor 9: Compact Errors](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-09-compact-errors.md) - [Factor 10: Small, Focused Agents](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-10-small-focused-agents.md) - [Factor 11: Trigger From Anywhere](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-11-trigger-from-anywhere.md) ================================================ FILE: workshops/2025-07-16/walkthroughgen_py.py ================================================ #!/usr/bin/env python3 """Convert walkthrough.yaml to Jupyter notebook for BAML Python tutorials.""" import yaml import nbformat from nbformat.v4 import new_notebook, new_markdown_cell, new_code_cell import os import sys from pathlib import Path import argparse def create_baml_setup_cells(nb): """Add BAML setup cells with explanation.""" # Add explanation markdown explanation = """### BAML Setup Don't worry too much about this setup code - it will make sense later! For now, just know that: - BAML is a tool for working with language models - We need some special setup code to make it work nicely in Google Colab - The `get_baml_client()` function will be used to interact with AI models""" nb.cells.append(new_markdown_cell(explanation)) # First cell: Install baml-py and pydantic install_code = "!pip install baml-py==0.202.0 pydantic" nb.cells.append(new_code_cell(install_code)) # Second cell: Helper functions setup_code = '''import subprocess import os # Try to import Google Colab userdata, but don't fail if not in Colab try: from google.colab import userdata IN_COLAB = True except ImportError: IN_COLAB = False def baml_generate(): try: result = subprocess.run( ["baml-cli", "generate"], check=True, capture_output=True, text=True ) if result.stdout: print("[baml-cli generate]\\n", result.stdout) if result.stderr: print("[baml-cli generate]\\n", result.stderr) except subprocess.CalledProcessError as e: msg = ( f"`baml-cli generate` failed with exit code {e.returncode}\\n" f"--- STDOUT ---\\n{e.stdout}\\n" f"--- STDERR ---\\n{e.stderr}" ) raise RuntimeError(msg) from None def get_baml_client(): """ a bunch of fun jank to work around the google colab import cache """ # Set API key from Colab secrets or environment if IN_COLAB: os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY') elif 'OPENAI_API_KEY' not in os.environ: print("Warning: OPENAI_API_KEY not set. Please set it in your environment.") baml_generate() # Force delete all baml_client modules from sys.modules import sys modules_to_delete = [key for key in sys.modules.keys() if key.startswith('baml_client')] for module in modules_to_delete: del sys.modules[module] # Now import fresh import baml_client return baml_client.sync_client.b ''' nb.cells.append(new_code_cell(setup_code)) # Third cell: Initialize BAML init_code = "!baml-cli init" nb.cells.append(new_code_cell(init_code)) def process_step(nb, step, base_path, current_functions, section_name=None): """Process different step types.""" if 'text' in step: # Add markdown cell nb.cells.append(new_markdown_cell(step['text'])) if 'baml_setup' in step: # Add BAML setup cells create_baml_setup_cells(nb) if 'file' in step: src = step['file']['src'] # For Python files, add the entire file content as a code cell if src.endswith('.py'): # Handle relative paths that start with ./ if src.startswith('./'): file_path = base_path.parent / src[2:] else: file_path = base_path / src if file_path.exists(): with open(file_path, 'r') as f: content = f.read() # Add filename as comment at top code_with_header = f"# {src}\n{content}" nb.cells.append(new_code_cell(code_with_header)) else: print(f"Warning: File not found: {file_path}") nb.cells.append(new_markdown_cell(f"**Error: File not found: {src}**")) if 'fetch_file' in step: # Fetch BAML file from GitHub src = step['fetch_file']['src'] dest = step['fetch_file']['dest'] github_url = f"https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/{src}" command = f"!curl -fsSL -o {dest} {github_url} && cat {dest}" nb.cells.append(new_code_cell(command)) if 'dir' in step: # Create directory path = step['dir']['path'] command = f"!mkdir -p {path}" nb.cells.append(new_code_cell(command)) if 'command' in step: # Add command as code cell command = step['command'].strip() # Convert to notebook-style command if not command.startswith('!'): command = f"!{command}" nb.cells.append(new_code_cell(command)) if 'run_main' in step: # Run main function regenerate = step['run_main'].get('regenerate_baml', False) if regenerate: nb.cells.append(new_code_cell("baml_generate()")) # Build the main() call call_parts = [] # Check if args are provided args = step['run_main'].get('args', '') if args: call_parts.append(f'"{args}"') # Check if kwargs are provided kwargs = step['run_main'].get('kwargs', {}) for key, value in kwargs.items(): if isinstance(value, str): call_parts.append(f'{key}="{value}"') else: call_parts.append(f'{key}={value}') # Generate the function call if call_parts: main_call = f'main({", ".join(call_parts)})' else: main_call = "main()" # Execute the main function call nb.cells.append(new_code_cell(main_call)) def convert_walkthrough_to_notebook(yaml_path, output_path): """Convert walkthrough.yaml to Jupyter notebook.""" # Load YAML with open(yaml_path, 'r') as f: walkthrough = yaml.safe_load(f) # Create notebook nb = new_notebook() # Add title title = walkthrough.get('title', 'Walkthrough') nb.cells.append(new_markdown_cell(f"# {title}")) # Add description if 'text' in walkthrough: nb.cells.append(new_markdown_cell(walkthrough['text'])) # Process sections base_path = Path(yaml_path).parent current_functions = {} for section in walkthrough.get('sections', []): # Add section title section_title = section.get('title', section.get('name', 'Section')) section_name = section.get('name', '') nb.cells.append(new_markdown_cell(f"## {section_title}")) # Add section description if 'text' in section: nb.cells.append(new_markdown_cell(section['text'])) # Process steps for step in section.get('steps', []): process_step(nb, step, base_path, current_functions, section_name) # Write notebook with open(output_path, 'w') as f: nbformat.write(nb, f) print(f"Generated notebook: {output_path}") def main(): parser = argparse.ArgumentParser(description='Convert walkthrough.yaml to Jupyter notebook') parser.add_argument('yaml_file', help='Path to walkthrough.yaml') parser.add_argument('-o', '--output', default='output.ipynb', help='Output notebook file') args = parser.parse_args() convert_walkthrough_to_notebook(args.yaml_file, args.output) if __name__ == '__main__': main() ================================================ FILE: workshops/2025-07-16/workshop_final.ipynb ================================================ { "cells": [ { "cell_type": "markdown", "id": "a55820ee", "metadata": {}, "source": [ "# Building the 12-factor agent template from scratch in Python" ] }, { "cell_type": "markdown", "id": "ba52e30a", "metadata": {}, "source": [ "Steps to start from a bare Python repo and build up a 12-factor agent. This walkthrough will guide you through creating a Python agent that follows the 12-factor methodology with BAML." ] }, { "cell_type": "markdown", "id": "75b26c9b", "metadata": {}, "source": [ "## Chapter 0 - Hello World" ] }, { "cell_type": "markdown", "id": "fa4b9e07", "metadata": {}, "source": [ "Let's start with a basic Python setup and a hello world program." ] }, { "cell_type": "markdown", "id": "4e464227", "metadata": {}, "source": [ "This guide will walk you through building agents in Python with BAML.\n", "\n", "We'll start simple with a hello world program and gradually build up to a full agent.\n", "\n", "For this notebook, you'll need to have your OpenAI API key saved in Google Colab secrets.\n" ] }, { "cell_type": "markdown", "id": "99dac1bb", "metadata": {}, "source": [ "Here's our simple hello world program:" ] }, { "cell_type": "code", "execution_count": null, "id": "9c6946fd", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/00-main.py\n", "def hello():\n", " print('hello, world!')\n", "\n", "def main():\n", " hello()" ] }, { "cell_type": "markdown", "id": "5523efac", "metadata": {}, "source": [ "Let's run it to verify it works:" ] }, { "cell_type": "code", "execution_count": null, "id": "6a437eb2", "metadata": {}, "outputs": [], "source": [ "main()" ] }, { "cell_type": "markdown", "id": "d9aa0df6", "metadata": {}, "source": [ "## Chapter 1 - CLI and Agent Loop" ] }, { "cell_type": "markdown", "id": "970c65da", "metadata": {}, "source": [ "Now let's add BAML and create our first agent with a CLI interface." ] }, { "cell_type": "markdown", "id": "976a0fca", "metadata": {}, "source": [ "In this chapter, we'll integrate BAML to create an AI agent that can respond to user input.\n", "\n", "## What is BAML?\n", "\n", "BAML (Boundary Markup Language) is a domain-specific language designed to help developers build reliable AI workflows and agents. Created by [BoundaryML](https://www.boundaryml.com/) (a Y Combinator W23 company), BAML adds the engineering to prompt engineering.\n", "\n", "### Why BAML?\n", "\n", "- **Type-safe outputs**: Get fully type-safe outputs from LLMs, even when streaming\n", "- **Language agnostic**: Works with Python, TypeScript, Ruby, Go, and more\n", "- **LLM agnostic**: Works with any LLM provider (OpenAI, Anthropic, etc.)\n", "- **Better performance**: State-of-the-art structured outputs that outperform even OpenAI's native function calling\n", "- **Developer-friendly**: Native VSCode extension with syntax highlighting, autocomplete, and interactive playground\n", "\n", "### Learn More\n", "\n", "- 📚 [Official Documentation](https://docs.boundaryml.com/home)\n", "- 💻 [GitHub Repository](https://github.com/BoundaryML/baml)\n", "- 🎯 [What is BAML?](https://docs.boundaryml.com/guide/introduction/what-is-baml)\n", "- 📖 [BAML Examples](https://github.com/BoundaryML/baml-examples)\n", "- 🏢 [Company Website](https://www.boundaryml.com/)\n", "- 📰 [Blog: AI Agents Need a New Syntax](https://www.boundaryml.com/blog/ai-agents-need-new-syntax)\n", "\n", "BAML turns prompt engineering into schema engineering, where you focus on defining the structure of your data rather than wrestling with prompts. This approach leads to more reliable and maintainable AI applications.\n", "\n", "### Note on Developer Experience\n", "\n", "BAML works much better in VS Code with their official extension, which provides syntax highlighting, autocomplete, inline testing, and an interactive playground. However, for this notebook tutorial, we'll work with BAML files directly without the enhanced IDE features.\n", "\n", "First, let's set up BAML support in our notebook.\n" ] }, { "cell_type": "markdown", "id": "ba1f7191", "metadata": {}, "source": [ "### BAML Setup\n", "\n", "Don't worry too much about this setup code - it will make sense later! For now, just know that:\n", "- BAML is a tool for working with language models\n", "- We need some special setup code to make it work nicely in Google Colab\n", "- The `get_baml_client()` function will be used to interact with AI models" ] }, { "cell_type": "code", "execution_count": null, "id": "9910f8a3", "metadata": {}, "outputs": [], "source": [ "!pip install baml-py==0.202.0 pydantic" ] }, { "cell_type": "code", "execution_count": null, "id": "a4ad6e77", "metadata": {}, "outputs": [], "source": [ "import subprocess\n", "import os\n", "\n", "# Try to import Google Colab userdata, but don't fail if not in Colab\n", "try:\n", " from google.colab import userdata\n", " IN_COLAB = True\n", "except ImportError:\n", " IN_COLAB = False\n", "\n", "def baml_generate():\n", " try:\n", " result = subprocess.run(\n", " [\"baml-cli\", \"generate\"],\n", " check=True,\n", " capture_output=True,\n", " text=True\n", " )\n", " if result.stdout:\n", " print(\"[baml-cli generate]\\n\", result.stdout)\n", " if result.stderr:\n", " print(\"[baml-cli generate]\\n\", result.stderr)\n", " except subprocess.CalledProcessError as e:\n", " msg = (\n", " f\"`baml-cli generate` failed with exit code {e.returncode}\\n\"\n", " f\"--- STDOUT ---\\n{e.stdout}\\n\"\n", " f\"--- STDERR ---\\n{e.stderr}\"\n", " )\n", " raise RuntimeError(msg) from None\n", "\n", "def get_baml_client():\n", " \"\"\"\n", " a bunch of fun jank to work around the google colab import cache\n", " \"\"\"\n", " # Set API key from Colab secrets or environment\n", " if IN_COLAB:\n", " os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n", " elif 'OPENAI_API_KEY' not in os.environ:\n", " print(\"Warning: OPENAI_API_KEY not set. Please set it in your environment.\")\n", " \n", " baml_generate()\n", " \n", " # Force delete all baml_client modules from sys.modules\n", " import sys\n", " modules_to_delete = [key for key in sys.modules.keys() if key.startswith('baml_client')]\n", " for module in modules_to_delete:\n", " del sys.modules[module]\n", " \n", " # Now import fresh\n", " import baml_client\n", " return baml_client.sync_client.b\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b99ba982", "metadata": {}, "outputs": [], "source": [ "!baml-cli init" ] }, { "cell_type": "code", "execution_count": null, "id": "ee716f3a", "metadata": {}, "outputs": [], "source": [ "!ls baml_src" ] }, { "cell_type": "markdown", "id": "894474da", "metadata": {}, "source": [ "Now let's create our agent that will use BAML to process user input.\n", "\n", "First, we'll define the core agent logic:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "dbf9d929", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/01-agent.py\n", "import json\n", "from typing import Dict, Any, List\n", "\n", "# tool call or a respond to human tool\n", "AgentResponse = Any # This will be the return type from b.DetermineNextStep\n", "\n", "class Event:\n", " def __init__(self, type: str, data: Any):\n", " self.type = type\n", " self.data = data\n", "\n", "class Thread:\n", " def __init__(self, events: List[Dict[str, Any]]):\n", " self.events = events\n", " \n", " def serialize_for_llm(self):\n", " # can change this to whatever custom serialization you want to do, XML, etc\n", " # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105\n", " return json.dumps(self.events)\n", "\n", "# right now this just runs one turn with the LLM, but\n", "# we'll update this function to handle all the agent logic\n", "def agent_loop(thread: Thread) -> AgentResponse:\n", " b = get_baml_client() # This will be defined by the BAML setup\n", " next_step = b.DetermineNextStep(thread.serialize_for_llm())\n", " return next_step" ] }, { "cell_type": "markdown", "id": "b9421cd4", "metadata": {}, "source": [ "Next, we need to define the BAML function that our agent will use.\n", "\n", "### Understanding BAML Syntax\n", "\n", "BAML files define:\n", "- **Classes**: Structured output schemas (like `DoneForNow` below)\n", "- **Functions**: AI-powered functions that take inputs and return structured outputs\n", "- **Tests**: Example inputs/outputs to validate your prompts\n", "\n", "This BAML file defines what our agent can do:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "58d8bda5", "metadata": {}, "outputs": [], "source": [ "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/01-agent.baml && cat baml_src/agent.baml" ] }, { "cell_type": "code", "execution_count": null, "id": "1edc5279", "metadata": {}, "outputs": [], "source": [ "!ls baml_src" ] }, { "cell_type": "markdown", "id": "ee489cc1", "metadata": {}, "source": [ "Now let's create our main function that accepts a message parameter:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f4fea69e", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/01-main.py\n", "def main(message=\"hello from the notebook!\"):\n", " # Create a new thread with the user's message as the initial event\n", " thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n", " \n", " # Run the agent loop with the thread\n", " result = agent_loop(thread)\n", " print(result)" ] }, { "cell_type": "markdown", "id": "fe3fd9c7", "metadata": {}, "source": [ "Let's test our agent! Try calling main() with different messages:\n", "- `main(\"What's the weather like?\")`\n", "- `main(\"Tell me a joke\")`\n", "- `main(\"How are you doing today?\")`\n", "\n", "in this case, we'll use the baml_generate function to\n", "generate the pydantic and python bindings from our\n", "baml source, but in the future we'll skip this step as it\n", "is done automatically by the get_baml_client() function\n" ] }, { "cell_type": "code", "execution_count": null, "id": "7fc1ee38", "metadata": {}, "outputs": [], "source": [ "baml_generate()" ] }, { "cell_type": "code", "execution_count": null, "id": "8756df71", "metadata": {}, "outputs": [], "source": [ "main(\"Hello from the Python notebook!\")" ] }, { "cell_type": "markdown", "id": "9b5ca88c", "metadata": {}, "source": [] }, { "cell_type": "markdown", "id": "e79f4d84", "metadata": {}, "source": [ "## Chapter 2 - Add Calculator Tools" ] }, { "cell_type": "markdown", "id": "4659d5ef", "metadata": {}, "source": [ "Let's add some calculator tools to our agent." ] }, { "cell_type": "markdown", "id": "73df701a", "metadata": {}, "source": [ "Let's start by adding a tool definition for the calculator.\n", "\n", "These are simple structured outputs that we'll ask the model to\n", "return as a \"next step\" in the agentic loop.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "c538cd53", "metadata": {}, "outputs": [], "source": [ "!curl -fsSL -o baml_src/tool_calculator.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/02-tool_calculator.baml && cat baml_src/tool_calculator.baml" ] }, { "cell_type": "code", "execution_count": null, "id": "1df07ff3", "metadata": {}, "outputs": [], "source": [ "!ls baml_src" ] }, { "cell_type": "markdown", "id": "1ffe3854", "metadata": {}, "source": [ "Now, let's update the agent's DetermineNextStep method to\n", "expose the calculator tools as potential next steps.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "d6f9ee99", "metadata": {}, "outputs": [], "source": [ "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/02-agent.baml && cat baml_src/agent.baml" ] }, { "cell_type": "markdown", "id": "147bd22c", "metadata": {}, "source": [ "Now let's update our main function to show the tool call:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f8f99089", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/02-main.py\n", "def main(message=\"hello from the notebook!\"):\n", " # Create a new thread with the user's message\n", " thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n", " \n", " # Get BAML client\n", " b = get_baml_client()\n", " \n", " # Get the next step from the agent - just show the tool call\n", " next_step = b.DetermineNextStep(thread.serialize_for_llm())\n", " \n", " # Print the raw response to show the tool call\n", " print(next_step)" ] }, { "cell_type": "markdown", "id": "ffb6c213", "metadata": {}, "source": [ "Let's try out the calculator! The agent should recognize that you want to perform a calculation\n", "and return the appropriate tool call instead of just a message.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "7afaa326", "metadata": {}, "outputs": [], "source": [ "main(\"can you add 3 and 4\")" ] }, { "cell_type": "markdown", "id": "599d21dd", "metadata": {}, "source": [ "## Chapter 3 - Process Tool Calls in a Loop" ] }, { "cell_type": "markdown", "id": "d80e3f9f", "metadata": {}, "source": [ "Now let's add a real agentic loop that can run the tools and get a final answer from the LLM." ] }, { "cell_type": "markdown", "id": "427fbc77", "metadata": {}, "source": [ "In this chapter, we'll enhance our agent to process tool calls in a loop. This means:\n", "- The agent can call multiple tools in sequence\n", "- Each tool result is fed back to the agent\n", "- The agent continues until it has a final answer\n", "\n", "Let's update our agent to handle tool calls properly:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ac8ae567", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/03-agent.py\n", "import json\n", "from typing import Dict, Any, List\n", "\n", "class Thread:\n", " def __init__(self, events: List[Dict[str, Any]]):\n", " self.events = events\n", " \n", " def serialize_for_llm(self):\n", " # can change this to whatever custom serialization you want to do, XML, etc\n", " # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105\n", " return json.dumps(self.events)\n", "\n", "\n", "def agent_loop(thread: Thread) -> str:\n", " b = get_baml_client()\n", " \n", " while True:\n", " next_step = b.DetermineNextStep(thread.serialize_for_llm())\n", " print(\"nextStep\", next_step)\n", " \n", " if next_step.intent == \"done_for_now\":\n", " # response to human, return the next step object\n", " return next_step.message\n", " elif next_step.intent == \"add\":\n", " thread.events.append({\n", " \"type\": \"tool_call\",\n", " \"data\": next_step.__dict__\n", " })\n", " result = next_step.a + next_step.b\n", " print(\"tool_response\", result)\n", " thread.events.append({\n", " \"type\": \"tool_response\",\n", " \"data\": result\n", " })\n", " continue\n", " else:\n", " raise ValueError(f\"Unknown intent: {next_step.intent}\")" ] }, { "cell_type": "markdown", "id": "e875f4c2", "metadata": {}, "source": [ "Now let's update our main function to use the new agent loop:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "2aead128", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/03-main.py\n", "def main(message=\"hello from the notebook!\"):\n", " # Create a new thread with the user's message\n", " thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n", " \n", " # Run the agent loop with full tool handling\n", " result = agent_loop(thread)\n", " \n", " # Print the final response\n", " print(f\"\\nFinal response: {result}\")" ] }, { "cell_type": "markdown", "id": "a29bf07d", "metadata": {}, "source": [ "Let's try it out! The agent should now call the tool and return the calculated result:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "c6c6a0ca", "metadata": {}, "outputs": [], "source": [ "main(\"can you add 3 and 4\")" ] }, { "cell_type": "markdown", "id": "4c20a7d5", "metadata": {}, "source": [ "You should see the agent:\n", "1. Recognize it needs to use the add tool\n", "2. Call the tool with the correct parameters\n", "3. Get the result (7)\n", "4. Generate a final response incorporating the result\n", "\n", "For more complex calculations, we need to handle all calculator operations. Let's add support for subtract, multiply, and divide:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "561c0b54", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/03b-agent.py\n", "import json\n", "from typing import Dict, Any, List, Union\n", "\n", "class Thread:\n", " def __init__(self, events: List[Dict[str, Any]]):\n", " self.events = events\n", " \n", " def serialize_for_llm(self):\n", " # can change this to whatever custom serialization you want to do, XML, etc\n", " # e.g. https://github.com/got-agents/agents/blob/59ebbfa236fc376618f16ee08eb0f3bf7b698892/linear-assistant-ts/src/agent.ts#L66-L105\n", " return json.dumps(self.events)\n", "\n", "def handle_next_step(next_step, thread: Thread) -> Thread:\n", " result: float\n", " \n", " if next_step.intent == \"add\":\n", " result = next_step.a + next_step.b\n", " print(\"tool_response\", result)\n", " thread.events.append({\n", " \"type\": \"tool_response\",\n", " \"data\": result\n", " })\n", " return thread\n", " elif next_step.intent == \"subtract\":\n", " result = next_step.a - next_step.b\n", " print(\"tool_response\", result)\n", " thread.events.append({\n", " \"type\": \"tool_response\",\n", " \"data\": result\n", " })\n", " return thread\n", " elif next_step.intent == \"multiply\":\n", " result = next_step.a * next_step.b\n", " print(\"tool_response\", result)\n", " thread.events.append({\n", " \"type\": \"tool_response\",\n", " \"data\": result\n", " })\n", " return thread\n", " elif next_step.intent == \"divide\":\n", " result = next_step.a / next_step.b\n", " print(\"tool_response\", result)\n", " thread.events.append({\n", " \"type\": \"tool_response\",\n", " \"data\": result\n", " })\n", " return thread\n", "\n", "def agent_loop(thread: Thread) -> str:\n", " b = get_baml_client()\n", " \n", " while True:\n", " next_step = b.DetermineNextStep(thread.serialize_for_llm())\n", " print(\"nextStep\", next_step)\n", " \n", " thread.events.append({\n", " \"type\": \"tool_call\",\n", " \"data\": next_step.__dict__\n", " })\n", " \n", " if next_step.intent == \"done_for_now\":\n", " # response to human, return the next step object\n", " return next_step.message\n", " elif next_step.intent in [\"add\", \"subtract\", \"multiply\", \"divide\"]:\n", " thread = handle_next_step(next_step, thread)" ] }, { "cell_type": "markdown", "id": "7c612b06", "metadata": {}, "source": [ "Now let's test subtraction:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "4be4af22", "metadata": {}, "outputs": [], "source": [ "main(\"can you subtract 3 from 4\")" ] }, { "cell_type": "markdown", "id": "1da0ad58", "metadata": {}, "source": [ "Test multiplication:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "49d5e040", "metadata": {}, "outputs": [], "source": [ "main(\"can you multiply 3 and 4\")" ] }, { "cell_type": "markdown", "id": "d5a27929", "metadata": {}, "source": [ "Finally, let's test a complex multi-step calculation:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "431414aa", "metadata": {}, "outputs": [], "source": [ "main(\"can you multiply 3 and 4, then divide the result by 2 and then add 12 to that result\")" ] }, { "cell_type": "markdown", "id": "99ab35d5", "metadata": {}, "source": [ "Congratulations! You've taken your first step into hand-rolling an agent loop.\n", "\n", "Key concepts you've learned:\n", "- **Thread Management**: Tracking conversation history and tool calls\n", "- **Tool Execution**: Processing different tool types and returning results\n", "- **Agent Loop**: Continuing until the agent has a final answer\n", "\n", "From here, we'll start incorporating more intermediate and advanced concepts for 12-factor agents.\n" ] }, { "cell_type": "markdown", "id": "9ba4e319", "metadata": {}, "source": [ "## Chapter 4 - Add Tests to agent.baml" ] }, { "cell_type": "markdown", "id": "6bf77db0", "metadata": {}, "source": [ "Let's add some tests to our BAML agent." ] }, { "cell_type": "markdown", "id": "c6f0d38a", "metadata": {}, "source": [ "In this chapter, we'll learn about BAML testing - a powerful feature that helps ensure your agents behave correctly.\n", "\n", "## Why Test BAML Functions?\n", "\n", "- **Catch regressions**: Ensure changes don't break existing behavior\n", "- **Document behavior**: Tests serve as living documentation\n", "- **Validate edge cases**: Test complex scenarios and conversation flows\n", "- **CI/CD integration**: Run tests automatically in your pipeline\n", "\n", "Let's start with a simple test that checks the agent's ability to handle basic interactions:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "cd0ae03f", "metadata": {}, "outputs": [], "source": [ "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/04-agent.baml && cat baml_src/agent.baml" ] }, { "cell_type": "markdown", "id": "5bf05182", "metadata": {}, "source": [ "Run the tests to see them in action:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "30bbcac5", "metadata": {}, "outputs": [], "source": [ "!baml-cli test" ] }, { "cell_type": "markdown", "id": "2cbbf5db", "metadata": {}, "source": [ "Now let's improve the tests with assertions! Assertions let you verify specific properties of the agent's output.\n", "\n", "## BAML Assertion Syntax\n", "\n", "Assertions use the `@@assert` directive:\n", "```\n", "@@assert(name, {{condition}})\n", "```\n", "\n", "- `name`: A descriptive name for the assertion\n", "- `condition`: A boolean expression using `this` to access the output\n" ] }, { "cell_type": "code", "execution_count": null, "id": "dbbc5283", "metadata": {}, "outputs": [], "source": [ "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/04b-agent.baml && cat baml_src/agent.baml" ] }, { "cell_type": "markdown", "id": "ecf9cb68", "metadata": {}, "source": [ "Run the tests again to see assertions in action:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "8d0611f3", "metadata": {}, "outputs": [], "source": [ "!baml-cli test" ] }, { "cell_type": "markdown", "id": "8789e20e", "metadata": {}, "source": [ "Finally, let's add more complex test cases that test multi-step conversations.\n", "\n", "These tests simulate an entire conversation flow, including:\n", "- User input\n", "- Tool calls made by the agent\n", "- Tool responses\n", "- Final agent response\n" ] }, { "cell_type": "code", "execution_count": null, "id": "abf5be5b", "metadata": {}, "outputs": [], "source": [ "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/04c-agent.baml && cat baml_src/agent.baml" ] }, { "cell_type": "markdown", "id": "8ce0f9de", "metadata": {}, "source": [ "Run the comprehensive test suite:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "4afe82b8", "metadata": {}, "outputs": [], "source": [ "!baml-cli test" ] }, { "cell_type": "markdown", "id": "5d0ba42b", "metadata": {}, "source": [ "## Key Testing Concepts\n", "\n", "1. **Test Structure**: Each test specifies functions, arguments, and assertions\n", "2. **Progressive Testing**: Start simple, then test complex scenarios\n", "3. **Conversation History**: Test how the agent handles multi-turn conversations\n", "4. **Tool Integration**: Verify the agent correctly uses tools in sequence\n", "\n", "With these tests in place, you can confidently modify your agent knowing that core functionality is protected by automated tests!\n" ] }, { "cell_type": "markdown", "id": "bf15b77e", "metadata": {}, "source": [ "## Chapter 5 - Multiple Human Tools" ] }, { "cell_type": "markdown", "id": "e69dbeca", "metadata": {}, "source": [ "In this section, we'll add support for multiple tools that serve to contact humans.\n" ] }, { "cell_type": "markdown", "id": "f3e29142", "metadata": {}, "source": [ "So far, our agent only returns a final answer with \"done_for_now\". But what if the agent needs clarification?\n", "\n", "Let's add a new tool that allows the agent to request more information from the user.\n", "\n", "## Why Human-in-the-Loop?\n", "\n", "- **Handle ambiguous inputs**: When user input is unclear or contains typos\n", "- **Request missing information**: When the agent needs more context\n", "- **Confirm sensitive operations**: Before performing important actions\n", "- **Interactive workflows**: Build conversational agents that engage users\n", "\n", "First, let's update our BAML file to include a ClarificationRequest tool:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "9b42b75e", "metadata": {}, "outputs": [], "source": [ "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/05-agent.baml && cat baml_src/agent.baml" ] }, { "cell_type": "markdown", "id": "7be2af7d", "metadata": {}, "source": [ "Now let's update our agent to handle clarification requests:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "21a3f526", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/05-agent.py\n", "# Agent implementation with clarification support\n", "import json\n", "\n", "def agent_loop(thread, clarification_handler, max_iterations=3):\n", " \"\"\"Run the agent loop until we get a final answer (max 3 iterations).\"\"\"\n", " iteration_count = 0\n", " while iteration_count < max_iterations:\n", " iteration_count += 1\n", " print(f\"🔄 Agent loop iteration {iteration_count}/{max_iterations}\")\n", " \n", " # Get the client\n", " baml_client = get_baml_client()\n", " \n", " # Serialize the thread\n", " thread_json = json.dumps(thread.events, indent=2)\n", " \n", " # Call the agent\n", " result = baml_client.DetermineNextStep(thread_json)\n", " \n", " # Check what type of result we got based on intent\n", " if hasattr(result, 'intent'):\n", " if result.intent == 'done_for_now':\n", " return result.message\n", " elif result.intent == 'request_more_information':\n", " # Get clarification from the human\n", " clarification = clarification_handler(result.message)\n", " \n", " # Add the clarification to the thread\n", " thread.events.append({\n", " \"type\": \"clarification_request\",\n", " \"data\": result.message\n", " })\n", " thread.events.append({\n", " \"type\": \"clarification_response\",\n", " \"data\": clarification\n", " })\n", " \n", " # Continue the loop with the clarification\n", " elif result.intent in ['add', 'subtract', 'multiply', 'divide']:\n", " # Execute the appropriate tool based on intent\n", " if result.intent == 'add':\n", " result_value = result.a + result.b\n", " operation = f\"add({result.a}, {result.b})\"\n", " elif result.intent == 'subtract':\n", " result_value = result.a - result.b\n", " operation = f\"subtract({result.a}, {result.b})\"\n", " elif result.intent == 'multiply':\n", " result_value = result.a * result.b\n", " operation = f\"multiply({result.a}, {result.b})\"\n", " elif result.intent == 'divide':\n", " if result.b == 0:\n", " result_value = \"Error: Division by zero\"\n", " else:\n", " result_value = result.a / result.b\n", " operation = f\"divide({result.a}, {result.b})\"\n", " \n", " print(f\"🔧 Calling tool: {operation} = {result_value}\")\n", " \n", " # Add the tool call and result to the thread\n", " thread.events.append({\n", " \"type\": \"tool_call\",\n", " \"data\": {\n", " \"tool\": \"calculator\",\n", " \"operation\": operation,\n", " \"result\": result_value\n", " }\n", " })\n", " else:\n", " return \"Error: Unexpected result type\"\n", " \n", " # If we've reached max iterations without a final answer\n", " return f\"Agent reached maximum iterations ({max_iterations}) without completing the task.\"\n", "\n", "class Thread:\n", " \"\"\"Simple thread to track conversation history.\"\"\"\n", " def __init__(self, events):\n", " self.events = events" ] }, { "cell_type": "markdown", "id": "5f017c77", "metadata": {}, "source": [ "Finally, let's create a main function that handles human interaction:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "e648be92", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/05-main.py\n", "def get_human_input(prompt):\n", " \"\"\"Get input from human, handling both Colab and local environments.\"\"\"\n", " print(f\"\\n🤔 {prompt}\")\n", " \n", " if IN_COLAB:\n", " # In Colab, use actual input\n", " response = input(\"Your response: \")\n", " else:\n", " # In local testing, return a fixed response\n", " response = \"I meant to multiply 3 and 4\"\n", " print(f\"📝 [Auto-response for testing]: {response}\")\n", " \n", " return response\n", "\n", "def main(message=\"hello from the notebook!\"):\n", " # Function to handle clarification requests\n", " def handle_clarification(question):\n", " return get_human_input(f\"The agent needs clarification: {question}\")\n", " \n", " # Create a new thread with the user's message\n", " thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n", " \n", " print(f\"🚀 Starting agent with message: '{message}'\")\n", " \n", " # Run the agent loop\n", " result = agent_loop(thread, handle_clarification)\n", " \n", " # Print the final response\n", " print(f\"\\n✅ Final response: {result}\")" ] }, { "cell_type": "markdown", "id": "2f4b962e", "metadata": {}, "source": [ "Let's test with an ambiguous input that should trigger a clarification request:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "948684f2", "metadata": {}, "outputs": [], "source": [ "main(\"can you multiply 3 and FD*(#F&&\")" ] }, { "cell_type": "markdown", "id": "54b7d0d4", "metadata": {}, "source": [ "You should see:\n", "1. The agent recognizes the input is unclear\n", "2. It asks for clarification\n", "3. In Colab, you'll be prompted to type a response\n", "4. In local testing, an auto-response is provided\n", "5. The agent continues with the clarified input\n", "\n", "## Interactive Testing in Colab\n", "\n", "When running in Google Colab, the `input()` function will create an interactive text box where you can type your response. Try different clarifications to see how the agent adapts!\n", "\n", "## Key Concepts\n", "\n", "- **Human Tools**: Special tool types that return control to the human\n", "- **Conversation Flow**: The agent can pause execution to get human input\n", "- **Context Preservation**: The full conversation history is maintained\n", "- **Flexible Handling**: Different behaviors for different environments\n" ] }, { "cell_type": "markdown", "id": "253d3f6f", "metadata": {}, "source": [ "## Chapter 6 - Customize Your Prompt with Reasoning" ] }, { "cell_type": "markdown", "id": "87dc996a", "metadata": {}, "source": [ "In this section, we'll explore how to customize the prompt of the agent with reasoning steps.\n", "\n", "This is core to [factor 2 - own your prompts](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-2-own-your-prompts.md)\n" ] }, { "cell_type": "markdown", "id": "7694a842", "metadata": {}, "source": [ "## Why Add Reasoning to Prompts?\n", "\n", "Adding explicit reasoning steps to your prompts can significantly improve agent performance:\n", "\n", "- **Better decisions**: The model thinks through problems step-by-step\n", "- **Transparency**: You can see the model's thought process\n", "- **Fewer errors**: Structured thinking reduces mistakes\n", "- **Debugging**: Easier to identify where reasoning went wrong\n", "\n", "Let's update our agent prompt to include a reasoning step:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "2b38033a", "metadata": {}, "outputs": [], "source": [ "!curl -fsSL -o baml_src/agent.baml https://raw.githubusercontent.com/humanlayer/12-factor-agents/refs/heads/main/workshops/2025-07-16/./walkthrough/06-agent.baml && cat baml_src/agent.baml" ] }, { "cell_type": "markdown", "id": "30aff7de", "metadata": {}, "source": [ "Now let's test it with a simple calculation to see the reasoning in action:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "515f9755", "metadata": {}, "outputs": [], "source": [ "main(\"can you multiply 3 and 4\")" ] }, { "cell_type": "markdown", "id": "2f69536c", "metadata": {}, "source": [ "The model uses explicit reasoning steps to think through the problem before making a decision.\n", "\n", "## Advanced Prompt Engineering\n", "\n", "You can enhance your prompts further by:\n", "- Adding specific reasoning templates for different tasks\n", "- Including examples of good reasoning\n", "- Structuring the reasoning with numbered steps\n", "- Adding checks for common mistakes\n", "\n", "The key is to guide the model's thinking process while still allowing flexibility.\n" ] }, { "cell_type": "markdown", "id": "8274aff0", "metadata": {}, "source": [ "## Chapter 7 - Customize Your Context Window" ] }, { "cell_type": "markdown", "id": "f930c899", "metadata": {}, "source": [ "In this section, we'll explore how to customize the context window of the agent.\n", "\n", "This is core to [factor 3 - own your context window](https://github.com/humanlayer/12-factor-agents/blob/main/content/factor-3-own-your-context-window.md)\n" ] }, { "cell_type": "markdown", "id": "1d4235ed", "metadata": {}, "source": [ "## Context Window Serialization\n", "\n", "How you format your conversation history can significantly impact:\n", "- **Token usage**: Some formats are more efficient\n", "- **Model understanding**: Clear structure helps the model\n", "- **Debugging**: Readable formats help development\n", "\n", "Let's implement two serialization formats: pretty-printed JSON and XML.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "dccf9a9f", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/07-agent.py\n", "# Agent with configurable serialization formats\n", "import json\n", "\n", "class Thread:\n", " \"\"\"Thread that can serialize to different formats.\"\"\"\n", " def __init__(self, events):\n", " self.events = events\n", " \n", " def serialize_as_json(self):\n", " \"\"\"Serialize thread events to pretty-printed JSON.\"\"\"\n", " return json.dumps(self.events, indent=2)\n", " \n", " def serialize_as_xml(self):\n", " \"\"\"Serialize thread events to XML format for better token efficiency.\"\"\"\n", " import yaml\n", " xml_parts = [\"\"]\n", " \n", " for event in self.events:\n", " event_type = event['type']\n", " event_data = event['data']\n", " \n", " if event_type == 'user_input':\n", " xml_parts.append(f' {event_data}')\n", " elif event_type == 'tool_call':\n", " # Use YAML for tool call args - more compact than nested XML\n", " yaml_content = yaml.dump(event_data, default_flow_style=False).strip()\n", " xml_parts.append(f' <{event_data[\"tool\"]}>')\n", " xml_parts.append(' ' + '\\n '.join(yaml_content.split('\\n')))\n", " xml_parts.append(f' ')\n", " elif event_type == 'clarification_request':\n", " xml_parts.append(f' {event_data}')\n", " elif event_type == 'clarification_response':\n", " xml_parts.append(f' {event_data}')\n", " \n", " xml_parts.append(\"\")\n", " return \"\\n\".join(xml_parts)\n", "\n", "def agent_loop(thread, clarification_handler, use_xml=True):\n", " \"\"\"Run the agent loop with configurable serialization.\"\"\"\n", " while True:\n", " # Get the client\n", " baml_client = get_baml_client()\n", " \n", " # Serialize the thread based on format preference\n", " if use_xml:\n", " thread_str = thread.serialize_as_xml()\n", " print(f\"📄 Using XML serialization ({len(thread_str)} chars)\")\n", " else:\n", " thread_str = thread.serialize_as_json()\n", " print(f\"📄 Using JSON serialization ({len(thread_str)} chars)\")\n", " \n", " # Call the agent\n", " result = baml_client.DetermineNextStep(thread_str)\n", " \n", " # Check what type of result we got based on intent\n", " if hasattr(result, 'intent'):\n", " if result.intent == 'done_for_now':\n", " return result.message\n", " elif result.intent == 'request_more_information':\n", " # Get clarification from the human\n", " clarification = clarification_handler(result.message)\n", " \n", " # Add the clarification to the thread\n", " thread.events.append({\n", " \"type\": \"clarification_request\",\n", " \"data\": result.message\n", " })\n", " thread.events.append({\n", " \"type\": \"clarification_response\",\n", " \"data\": clarification\n", " })\n", " \n", " # Continue the loop with the clarification\n", " elif result.intent in ['add', 'subtract', 'multiply', 'divide']:\n", " # Execute the appropriate tool based on intent\n", " if result.intent == 'add':\n", " result_value = result.a + result.b\n", " operation = f\"add({result.a}, {result.b})\"\n", " elif result.intent == 'subtract':\n", " result_value = result.a - result.b\n", " operation = f\"subtract({result.a}, {result.b})\"\n", " elif result.intent == 'multiply':\n", " result_value = result.a * result.b\n", " operation = f\"multiply({result.a}, {result.b})\"\n", " elif result.intent == 'divide':\n", " if result.b == 0:\n", " result_value = \"Error: Division by zero\"\n", " else:\n", " result_value = result.a / result.b\n", " operation = f\"divide({result.a}, {result.b})\"\n", " \n", " print(f\"🔧 Calling tool: {operation} = {result_value}\")\n", " \n", " # Add the tool call and result to the thread\n", " thread.events.append({\n", " \"type\": \"tool_call\",\n", " \"data\": {\n", " \"tool\": \"calculator\",\n", " \"operation\": operation,\n", " \"result\": result_value\n", " }\n", " })\n", " else:\n", " return \"Error: Unexpected result type\"" ] }, { "cell_type": "markdown", "id": "e02d1361", "metadata": {}, "source": [ "Now let's create a main function that can switch between formats:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "03c71da7", "metadata": {}, "outputs": [], "source": [ "# ./walkthrough/07-main.py\n", "def main(message=\"hello from the notebook!\", use_xml=True):\n", " # Function to handle clarification requests\n", " def handle_clarification(question):\n", " return get_human_input(f\"The agent needs clarification: {question}\")\n", " \n", " # Create a new thread with the user's message\n", " thread = Thread([{\"type\": \"user_input\", \"data\": message}])\n", " \n", " print(f\"🚀 Starting agent with message: '{message}'\")\n", " print(f\"📋 Using {'XML' if use_xml else 'JSON'} format for thread serialization\")\n", " \n", " # Run the agent loop with XML serialization\n", " result = agent_loop(thread, handle_clarification, use_xml=use_xml)\n", " \n", " # Print the final response\n", " print(f\"\\n✅ Final response: {result}\")" ] }, { "cell_type": "markdown", "id": "1d1718ab", "metadata": {}, "source": [ "Let's test with JSON format first:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "41b41a22", "metadata": {}, "outputs": [], "source": [ "main(\"can you multiply 3 and 4, then divide the result by 2\", use_xml=False)" ] }, { "cell_type": "markdown", "id": "d1bb4844", "metadata": {}, "source": [ "Now let's try the same with XML format:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "2ab2a144", "metadata": {}, "outputs": [], "source": [ "main(\"can you multiply 3 and 4, then divide the result by 2\", use_xml=True)" ] }, { "cell_type": "markdown", "id": "8883acac", "metadata": {}, "source": [ "## XML vs JSON Trade-offs\n", "\n", "**XML Benefits**:\n", "- More token-efficient for nested data\n", "- Clear hierarchy with opening/closing tags\n", "- Better for long conversations\n", "\n", "**JSON Benefits**:\n", "- Familiar to most developers\n", "- Easy to parse and debug\n", "- Native to JavaScript/Python\n", "\n", "Choose based on your specific needs and token constraints!\n" ] } ], "metadata": {}, "nbformat": 4, "nbformat_minor": 5 }